diff --git a/browser/brave_shields/ad_block_service_browsertest.cc b/browser/brave_shields/ad_block_service_browsertest.cc index 72fe4a850f28..7eccc4c4ab3c 100644 --- a/browser/brave_shields/ad_block_service_browsertest.cc +++ b/browser/brave_shields/ad_block_service_browsertest.cc @@ -2292,8 +2292,8 @@ IN_PROC_BROWSER_TEST_F(AdBlockServiceTest, CosmeticFilteringWindowScriptlet) { UpdateAdBlockInstanceWithRules( "b.com##+js(hjt)", "[{" - "\"name\": \"hijacktest\"," - "\"aliases\": [\"hjt\"]," + "\"name\": \"hijacktest.js\"," + "\"aliases\": [\"hjt.js\"]," "\"kind\": {\"mime\": \"application/javascript\"}," "\"content\": \"" + scriptlet_base64 + "\"}]"); @@ -2336,7 +2336,7 @@ IN_PROC_BROWSER_TEST_F(ScriptletDebugLogsFlagEnabledTest, CanDebugSetToTrue) { UpdateAdBlockInstanceWithRules( "b.com##+js(debuggable)", "[{" - "\"name\": \"debuggable\"," + "\"name\": \"debuggable.js\"," "\"aliases\": []," "\"kind\": {\"mime\": \"application/javascript\"}," "\"content\": \"" + @@ -2371,8 +2371,8 @@ IN_PROC_BROWSER_TEST_F(AdBlockServiceTest, CheckForDeAmpPref) { UpdateAdBlockInstanceWithRules( "b.*##+js(deamp)", "[{" - "\"name\": \"deamp\"," - "\"aliases\": [\"deamp\"]," + "\"name\": \"deamp.js\"," + "\"aliases\": [\"deamp.js\"]," "\"kind\": {\"mime\": \"application/javascript\"}," "\"content\": \"" + scriptlet_base64 + "\"}]"); @@ -2409,8 +2409,8 @@ IN_PROC_BROWSER_TEST_F(AdBlockServiceTest, CosmeticFilteringIframeScriptlet) { UpdateAdBlockInstanceWithRules( "b.com##+js(hjt)", "[{" - "\"name\": \"hijacktest\"," - "\"aliases\": [\"hjt\"]," + "\"name\": \"hijacktest.js\"," + "\"aliases\": [\"hjt.js\"]," "\"kind\": {\"mime\": \"application/javascript\"}," "\"content\": \"" + scriptlet_base64 + "\"}]"); diff --git a/components/brave_shields/adblock/rs/BUILD.gn b/components/brave_shields/adblock/rs/BUILD.gn index 88e823a3453d..5e6a290cede7 100644 --- a/components/brave_shields/adblock/rs/BUILD.gn +++ b/components/brave_shields/adblock/rs/BUILD.gn @@ -25,7 +25,7 @@ rust_static_library("rust_lib") { cxx_bindings = [ "src/lib.rs" ] deps = [ - "//brave/third_party/rust/adblock/v0_7:lib", + "//brave/third_party/rust/adblock/v0_8:lib", "//brave/third_party/rust/thiserror/v1:lib", "//third_party/rust/serde_json/v1:lib", ] diff --git a/components/brave_shields/adblock/rs/Cargo.toml b/components/brave_shields/adblock/rs/Cargo.toml index 055df347fd10..b7a43ece53bb 100644 --- a/components/brave_shields/adblock/rs/Cargo.toml +++ b/components/brave_shields/adblock/rs/Cargo.toml @@ -4,7 +4,7 @@ version = "0.1.0" edition = "2021" [dependencies] -adblock = { version = "0.7.16", default-features = false, features = ["full-regex-handling", "debug-info", "css-validation"] } +adblock = { version = "0.8.0", default-features = false, features = ["full-regex-handling", "regex-debug-info", "css-validation"] } cxx = "1.0" serde_json = "1.0" thiserror = "1.0" diff --git a/components/brave_shields/adblock/rs/src/convert.rs b/components/brave_shields/adblock/rs/src/convert.rs index d21ecf7140ff..f69f935b0ac6 100644 --- a/components/brave_shields/adblock/rs/src/convert.rs +++ b/components/brave_shields/adblock/rs/src/convert.rs @@ -6,15 +6,14 @@ use std::time::Duration; use crate::ffi::{ - BlockerDebugInfo, BlockerResult, FilterListMetadata, OptionalString, OptionalU16, - RegexDebugEntry, RegexManagerDiscardPolicy, -}; -use adblock::blocker::{ - BlockerDebugInfo as InnerBlockerDebugInfo, BlockerResult as InnerBlockerResult, + BlockerResult, FilterListMetadata, OptionalString, OptionalU16, RegexDebugEntry, + RegexDebugInfo, RegexManagerDiscardPolicy, }; +use adblock::blocker::BlockerResult as InnerBlockerResult; use adblock::lists::{ExpiresInterval, FilterListMetadata as InnerFilterListMetadata}; use adblock::regex_manager::{ RegexDebugEntry as InnerRegexDebugEntry, + RegexDebugInfo as InnerRegexDebugInfo, RegexManagerDiscardPolicy as InnerRegexManagerDiscardPolicy, }; @@ -47,8 +46,8 @@ impl From for RegexDebugEntry { } } -impl From for BlockerDebugInfo { - fn from(info: InnerBlockerDebugInfo) -> Self { +impl From for RegexDebugInfo { + fn from(info: InnerRegexDebugInfo) -> Self { Self { regex_data: info.regex_data.into_iter().map(|e| e.into()).collect(), compiled_regex_count: info.compiled_regex_count, diff --git a/components/brave_shields/adblock/rs/src/engine.rs b/components/brave_shields/adblock/rs/src/engine.rs index 7665c0c1861e..feffb5e187d3 100644 --- a/components/brave_shields/adblock/rs/src/engine.rs +++ b/components/brave_shields/adblock/rs/src/engine.rs @@ -6,15 +6,15 @@ use std::collections::HashSet; use std::str::Utf8Error; -use adblock::engine::Engine as InnerEngine; +use adblock::Engine as InnerEngine; use adblock::lists::FilterSet; use adblock::resources::{MimeType, Resource, ResourceType}; use adblock::url_parser::ResolvesDomain; use cxx::{let_cxx_string, CxxString, CxxVector}; use crate::ffi::{ - resolve_domain_position, BlockerDebugInfo, BlockerResult, BoxEngineResult, - ContentBlockingRulesResult, FilterListMetadata, RegexManagerDiscardPolicy, UnitResult, + resolve_domain_position, BlockerResult, BoxEngineResult, ContentBlockingRulesResult, + FilterListMetadata, RegexDebugInfo, RegexManagerDiscardPolicy, UnitResult, VecStringResult, }; use crate::result::InternalError; @@ -140,12 +140,14 @@ impl Engine { // The following strings are guaranteed to be // UTF-8, so unwrapping directly should be okay. self.engine - .check_network_urls_with_hostnames_subset( - url.to_str().unwrap(), - hostname.to_str().unwrap(), - source_hostname.to_str().unwrap(), - request_type.to_str().unwrap(), - Some(third_party_request), + .check_network_request_subset( + &adblock::request::Request::preparsed( + url.to_str().unwrap(), + hostname.to_str().unwrap(), + source_hostname.to_str().unwrap(), + request_type.to_str().unwrap(), + third_party_request, + ), previously_matched_rule, force_check_exceptions, ) @@ -163,12 +165,14 @@ impl Engine { // The following strings are also UTF-8. self.engine .get_csp_directives( - url.to_str().unwrap(), - hostname.to_str().unwrap(), - source_hostname.to_str().unwrap(), - request_type.to_str().unwrap(), - Some(third_party_request), - ) + &adblock::request::Request::preparsed( + url.to_str().unwrap(), + hostname.to_str().unwrap(), + source_hostname.to_str().unwrap(), + request_type.to_str().unwrap(), + third_party_request, + ), + ) .unwrap_or_default() } @@ -188,6 +192,9 @@ impl Engine { aliases: vec![], kind: ResourceType::Mime(MimeType::from(content_type.to_str()?)), content: data.to_string(), + dependencies: vec![], + /// user-added resources require full permissions + permission: adblock::resources::PermissionMask::from_bits(0b11111111), }; Ok(self.engine.add_resource(resource)?) }() @@ -200,7 +207,7 @@ impl Engine { .ok() .and_then(|resources_json| serde_json::from_str::>(resources_json).ok()) .and_then(|resources| { - self.engine.use_resources(&resources); + self.engine.use_resources(resources); Some(()) }) .is_some() @@ -227,8 +234,8 @@ impl Engine { .into() } - pub fn get_debug_info(&self) -> BlockerDebugInfo { - self.engine.get_debug_info().blocker_debug_info.into() + pub fn get_regex_debug_info(&self) -> RegexDebugInfo { + self.engine.get_regex_debug_info().into() } pub fn discard_regex(&mut self, regex_id: u64) { diff --git a/components/brave_shields/adblock/rs/src/lib.rs b/components/brave_shields/adblock/rs/src/lib.rs index 265b633fa521..f1d6d3609422 100644 --- a/components/brave_shields/adblock/rs/src/lib.rs +++ b/components/brave_shields/adblock/rs/src/lib.rs @@ -82,7 +82,7 @@ mod ffi { exceptions: &CxxVector, ) -> VecStringResult; /// Returns the blocker debug info containing regex info. - fn get_debug_info(&self) -> BlockerDebugInfo; + fn get_regex_debug_info(&self) -> RegexDebugInfo; /// Removes a regex entry by the id. fn discard_regex(&mut self, regex_id: u64); /// Sets a discard policy for the regex manager. @@ -125,7 +125,7 @@ mod ffi { usage_count: usize, } - struct BlockerDebugInfo { + struct RegexDebugInfo { regex_data: Vec, compiled_regex_count: usize, } diff --git a/components/brave_shields/browser/ad_block_engine.cc b/components/brave_shields/browser/ad_block_engine.cc index 9c832de3dc18..5aa3f42b6551 100644 --- a/components/brave_shields/browser/ad_block_engine.cc +++ b/components/brave_shields/browser/ad_block_engine.cc @@ -174,7 +174,7 @@ bool AdBlockEngine::TagExists(const std::string& tag) { base::Value::Dict AdBlockEngine::GetDebugInfo() { DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); - const auto debug_info_struct = ad_block_client_->get_debug_info(); + const auto debug_info_struct = ad_block_client_->get_regex_debug_info(); base::Value::List regex_list; for (const auto& regex_entry : debug_info_struct.regex_data) { base::Value::Dict regex_info; diff --git a/components/speedreader/rust/lib/BUILD.gn b/components/speedreader/rust/lib/BUILD.gn index 1d0e183b599d..c6a37877b19d 100644 --- a/components/speedreader/rust/lib/BUILD.gn +++ b/components/speedreader/rust/lib/BUILD.gn @@ -17,7 +17,6 @@ rust_static_library("lib") { deps = [ "src/readability:lib", - "//brave/third_party/rust/flate2/v1:lib", "//brave/third_party/rust/html5ever/v0_25:lib", "//brave/third_party/rust/kuchiki/v0_8:lib", "//brave/third_party/rust/lifeguard/v0_6:lib", diff --git a/components/speedreader/rust/lib/Cargo.toml b/components/speedreader/rust/lib/Cargo.toml index 55534fe297d6..c9b108842d33 100644 --- a/components/speedreader/rust/lib/Cargo.toml +++ b/components/speedreader/rust/lib/Cargo.toml @@ -18,7 +18,6 @@ include = [ ] [dependencies] -flate2 = { version = "1.0.25", features = ["rust_backend"], default-features = false } html5ever = "0.25.1" kuchiki = "0.8.1" lol_html = "0.3.0" diff --git a/third_party/rust/Cargo.lock b/third_party/rust/Cargo.lock index 4b0620ee9c48..10f1a864bb3f 100644 --- a/third_party/rust/Cargo.lock +++ b/third_party/rust/Cargo.lock @@ -4,20 +4,18 @@ version = 3 [[package]] name = "adblock" -version = "0.7.17" +version = "0.8.0" dependencies = [ "base64", "bitflags", "cssparser 0.28.1", - "flate2", "idna 0.2.3", "itertools", "memchr", "once_cell", "percent-encoding", "regex", - "rmp-serde 0.13.7", - "rmp-serde 0.15.5", + "rmp-serde", "seahash", "selectors 0.23.0", "serde", @@ -307,7 +305,6 @@ dependencies = [ "ed25519-dalek-bip32", "env_logger", "feed-rs", - "flate2", "font-types", "forest_bigint", "futures", @@ -732,14 +729,6 @@ dependencies = [ "syn 1.0.99", ] -[[package]] -name = "flate2" -version = "1.0.25" -dependencies = [ - "crc32fast", - "miniz_oxide", -] - [[package]] name = "fnv" version = "1.0.7" @@ -1723,15 +1712,6 @@ dependencies = [ "paste", ] -[[package]] -name = "rmp-serde" -version = "0.13.7" -dependencies = [ - "byteorder", - "rmp", - "serde", -] - [[package]] name = "rmp-serde" version = "0.15.5" diff --git a/third_party/rust/Cargo.toml b/third_party/rust/Cargo.toml index d489c9aa94e2..104f180e105e 100644 --- a/third_party/rust/Cargo.toml +++ b/third_party/rust/Cargo.toml @@ -60,8 +60,8 @@ wycheproof = "0.4" [dependencies.adblock] default-features = false -version = "0.7" -features = ["full-regex-handling", "debug-info", "css-validation"] +version = "0.8" +features = ["full-regex-handling", "regex-debug-info", "css-validation"] gn-variables-lib = "if (is_ios) {\n features += [\n \"content-blocking\",\n \"serde_json\",\n ]\n deps += [ \"//third_party/rust/serde_json/v1:lib\" ]\n } else {\n features += [\n \"object-pooling\",\n \"lifeguard\",\n \"unsync-regex-caching\",\n ]\n deps += [ \"//brave/third_party/rust/lifeguard/v0_6:lib\" ]\n }\n" [dependencies.bigdecimal] @@ -107,11 +107,6 @@ default-features = false version = "0.10" features = ["color", "regex"] -[dependencies.flate2] -default-features = false -version = "1" -features = ["rust_backend"] - [dependencies.html5ever] version = "0.25" build-script-outputs = ["rules.rs"] @@ -209,8 +204,8 @@ build-script-outputs = ["tables.rs"] [dependencies.uuid] version = "0.8" features = ["serde"] -[patch.crates-io.adblock_v0_7] -path = "adblock/v0_7/crate" +[patch.crates-io.adblock_v0_8] +path = "adblock/v0_8/crate" package = "adblock" [patch.crates-io.addchain_v0_2] @@ -513,10 +508,6 @@ package = "ff" path = "ff/v0_13/crate" package = "ff" -[patch.crates-io.flate2_v1] -path = "flate2/v1/crate" -package = "flate2" - [patch.crates-io.fnv_v1] path = "fnv/v1/crate" package = "fnv" @@ -1001,10 +992,6 @@ package = "remove_dir_all" path = "ripemd/v0_1/crate" package = "ripemd" -[patch.crates-io.rmp-serde_v0_13] -path = "rmp_serde/v0_13/crate" -package = "rmp-serde" - [patch.crates-io.rmp-serde_v0_15] path = "rmp_serde/v0_15/crate" package = "rmp-serde" diff --git a/third_party/rust/adblock/v0_7/crate/.cargo_vcs_info.json b/third_party/rust/adblock/v0_7/crate/.cargo_vcs_info.json deleted file mode 100644 index 841d41d48da9..000000000000 --- a/third_party/rust/adblock/v0_7/crate/.cargo_vcs_info.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "git": { - "sha1": "735027feb6b617851732482d42bc673b15e5b968" - }, - "path_in_vcs": "" -} \ No newline at end of file diff --git a/third_party/rust/adblock/v0_7/crate/src/cosmetic_filter_cache.rs b/third_party/rust/adblock/v0_7/crate/src/cosmetic_filter_cache.rs deleted file mode 100644 index 460e0f99ad99..000000000000 --- a/third_party/rust/adblock/v0_7/crate/src/cosmetic_filter_cache.rs +++ /dev/null @@ -1,908 +0,0 @@ -//! Provides behavior related to cosmetic filtering - that is, modifying a page's contents after -//! it's been loaded into a browser. This is primarily used to hide or clean up unwanted page -//! elements that are served inline with the rest of the first-party content from a page, but can -//! also be used to inject JavaScript "scriptlets" that intercept and modify the behavior of -//! scripts on the page at runtime. -//! -//! The primary API exposed by this module is the `CosmeticFilterCache` struct, which stores -//! cosmetic filters and allows them to be queried efficiently at runtime for any which may be -//! relevant to a particular page. - -use crate::filters::cosmetic::CosmeticFilter; -use crate::filters::cosmetic::CosmeticFilterMask; -use crate::resources::{Resource, ScriptletResourceStorage}; -use crate::utils::Hash; - -use std::collections::{HashMap, HashSet}; - -use serde::{Deserialize, Serialize}; - -/// Contains cosmetic filter information intended to be used on a particular URL. -/// -/// `hide_selectors` is a set of any CSS selector on the page that should be hidden, i.e. styled as -/// `{ display: none !important; }`. -/// -/// `style_selectors` is a map of CSS selectors on the page to respective non-hide style rules, -/// i.e. any required styles other than `display: none`. -/// -/// `exceptions` is a set of any class or id CSS selectors that should not have generic rules -/// applied. In practice, these should be passed to `class_id_stylesheet` and not used otherwise. -/// -/// `injected_script` is the Javascript code for any scriptlets that should be injected into the -/// page. -/// -/// `generichide` is set to true if there is a corresponding `$generichide` exception network -/// filter. If so, the page should not query for additional generic rules using -/// `hidden_class_id_selectors`. -#[derive(Debug, PartialEq, Eq, Deserialize, Serialize)] -pub struct UrlSpecificResources { - pub hide_selectors: HashSet, - pub style_selectors: HashMap>, - pub exceptions: HashSet, - pub injected_script: String, - pub generichide: bool, -} - -impl UrlSpecificResources { - pub fn empty() -> Self { - Self { - hide_selectors: HashSet::new(), - style_selectors: HashMap::new(), - exceptions: HashSet::new(), - injected_script: String::new(), - generichide: false, - } - } -} - -/// Splits the given hostname-specific rules into three collections: -/// - a set of CSS selectors that should be hidden on all pages under the hostname -/// - a mapping from CSS selectors to any additional (i.e. not `display: none`) CSS styles that -/// should be applied to those elements -/// - a list of any scriptlets that should be injected into the page's JavaScript context -fn hostname_specific_rules( - rules: &[&SpecificFilterType], -) -> (HashSet, HashMap>, Vec) { - if rules.is_empty() { - (HashSet::default(), HashMap::default(), vec![]) - } else { - let mut script_rules = Vec::with_capacity(10); - - let mut hide_rules = HashSet::with_capacity(rules.len()); - let mut style_rules: HashMap> = HashMap::with_capacity(rules.len()); - - rules.iter().for_each(|rule| match rule { - SpecificFilterType::Hide(sel) => { - hide_rules.insert(sel.to_owned()); - } - SpecificFilterType::Style(sel, style) => { - if let Some(entry) = style_rules.get_mut(sel) { - entry.push(style.to_owned()); - } else { - style_rules.insert(sel.to_owned(), vec![style.to_owned()]); - } - } - SpecificFilterType::ScriptInject(sel) => { - script_rules.push(sel.to_owned()); - } - _ => unreachable!(), - }); - - (hide_rules, style_rules, script_rules) - } -} - -/// The main engine driving cosmetic filtering. -/// -/// There are two primary methods that should be considered when using this in a browser: -/// `hidden_class_id_selectors`, and `url_cosmetic_resources`. -/// -/// Note that cosmetic filtering is imprecise and that this structure is intenionally designed for -/// efficient querying in the context of a browser, optimizing for low memory usage in the page -/// context and good performance. It is *not* designed to provide a 100% accurate report of what -/// will be blocked on any particular page, although when used correctly, all provided rules and -/// scriptlets should be safe to apply. -#[derive(Deserialize, Serialize)] -pub(crate) struct CosmeticFilterCache { - /// Rules that are just the CSS class of an element to be hidden on all sites, e.g. `##.ad`. - pub(crate) simple_class_rules: HashSet, - /// Rules that are just the CSS id of an element to be hidden on all sites, e.g. `###banner`. - pub(crate) simple_id_rules: HashSet, - /// Rules that are the CSS selector of an element to be hidden on all sites, starting with a - /// class, e.g. `##.ad image`. - pub(crate) complex_class_rules: HashMap>, - /// Rules that are the CSS selector of an element to be hidden on all sites, starting with an - /// id, e.g. `###banner > .text a`. - pub(crate) complex_id_rules: HashMap>, - - pub(crate) specific_rules: HostnameRuleDb, - - /// Rules that are the CSS selector of an element to be hidden on all sites that do not fit - /// into any of the class or id buckets above, e.g. `##a[href="https://malware.com"]` - pub(crate) misc_generic_selectors: HashSet, - - pub(crate) scriptlets: ScriptletResourceStorage, -} - -impl CosmeticFilterCache { - pub fn new() -> Self { - Self { - simple_class_rules: HashSet::new(), - simple_id_rules: HashSet::new(), - complex_class_rules: HashMap::new(), - complex_id_rules: HashMap::new(), - - specific_rules: HostnameRuleDb::new(), - - misc_generic_selectors: HashSet::new(), - - scriptlets: Default::default(), - } - } - - pub fn from_rules(rules: Vec) -> Self { - let mut self_ = Self { - simple_class_rules: HashSet::with_capacity(rules.len() / 2), - simple_id_rules: HashSet::with_capacity(rules.len() / 2), - complex_class_rules: HashMap::with_capacity(rules.len() / 2), - complex_id_rules: HashMap::with_capacity(rules.len() / 2), - - specific_rules: HostnameRuleDb::new(), - - misc_generic_selectors: HashSet::with_capacity(rules.len() / 30), - - scriptlets: Default::default(), - }; - - for rule in rules { - self_.add_filter(rule) - } - - self_ - } - - pub fn add_filter(&mut self, rule: CosmeticFilter) { - if rule.has_hostname_constraint() { - if let Some(generic_rule) = rule.hidden_generic_rule() { - self.add_generic_filter(generic_rule); - } - self.specific_rules.store_rule(rule); - } else { - self.add_generic_filter(rule); - } - } - - /// Add a filter, assuming it has already been determined to be a generic rule - fn add_generic_filter(&mut self, rule: CosmeticFilter) { - if rule.mask.contains(CosmeticFilterMask::IS_CLASS_SELECTOR) { - if let Some(key) = &rule.key { - let key = key.clone(); - if rule.mask.contains(CosmeticFilterMask::IS_SIMPLE) { - self.simple_class_rules.insert(key); - } else { - if let Some(bucket) = self.complex_class_rules.get_mut(&key) { - bucket.push(rule.selector); - } else { - self.complex_class_rules.insert(key, vec![rule.selector]); - } - } - } - } else if rule.mask.contains(CosmeticFilterMask::IS_ID_SELECTOR) { - if let Some(key) = &rule.key { - let key = key.clone(); - if rule.mask.contains(CosmeticFilterMask::IS_SIMPLE) { - self.simple_id_rules.insert(key); - } else { - if let Some(bucket) = self.complex_id_rules.get_mut(&key) { - bucket.push(rule.selector); - } else { - self.complex_id_rules.insert(key, vec![rule.selector]); - } - } - } - } else { - self.misc_generic_selectors.insert(rule.selector); - } - } - - /// Generic class/id rules are by far the most common type of cosmetic filtering rule, and they - /// apply to all sites. Rather than injecting all of these rules onto every page, which would - /// blow up memory usage, we only inject rules based on classes and ids that actually appear on - /// the page (in practice, a `MutationObserver` is used to identify those elements). We can - /// include rules like `.a-class div#ads > .advertisement`, keyed by the `.a-class` selector, - /// since we know that this rule cannot possibly apply unless there is an `.a-class` element on - /// the page. - /// - /// This method returns all of the generic CSS selectors of elements to hide (i.e. with a - /// `display: none !important` CSS rule) that could possibly be or become relevant to the page - /// given the new classes and ids that have appeared on the page. It guarantees that it will be - /// safe to hide those elements on a particular page by taking into account the page's - /// hostname-specific set of exception rules. - /// - /// The exceptions should be returned directly as they appear in the page's - /// `UrlSpecificResources`. The exceptions, along with the set of already-seen classes and ids, - /// must be cached externally as the cosmetic filtering subsystem here is designed to be - /// stateless with regard to active page sessions. - pub fn hidden_class_id_selectors( - &self, - classes: &[String], - ids: &[String], - exceptions: &HashSet, - ) -> Vec { - let mut simple_classes = vec![]; - let mut simple_ids = vec![]; - let mut complex_selectors = vec![]; - - classes.iter().for_each(|class| { - if self.simple_class_rules.contains(class) - && !exceptions.contains(&format!(".{}", class)) - { - simple_classes.push(class); - } - if let Some(bucket) = self.complex_class_rules.get(class) { - complex_selectors.extend(bucket.iter().filter(|sel| !exceptions.contains(*sel))); - } - }); - ids.iter().for_each(|id| { - if self.simple_id_rules.contains(id) && !exceptions.contains(&format!("#{}", id)) { - simple_ids.push(id); - } - if let Some(bucket) = self.complex_id_rules.get(id) { - complex_selectors.extend(bucket.iter().filter(|sel| !exceptions.contains(*sel))); - } - }); - - if simple_classes.is_empty() && simple_ids.is_empty() && complex_selectors.is_empty() { - return vec![]; - } - - simple_classes - .into_iter() - .map(|class| format!(".{}", class)) - .chain(simple_ids.into_iter().map(|id| format!("#{}", id))) - .chain(complex_selectors.into_iter().cloned()) - .collect::>() - } - - /// Any rules that can't be handled by `hidden_class_id_selectors` are returned by - /// `hostname_cosmetic_resources`. As soon as a page navigation is committed, this method - /// should be queried to get the initial set of cosmetic filtering operations to apply to the - /// page. This provides any rules specifying elements to hide by selectors that are too complex - /// to be returned by `hidden_class_id_selectors` (i.e. not directly starting with a class or - /// id selector, like `div[class*="Ads"]`), or any rule that is only applicable to a particular - /// hostname or set of hostnames (like `example.com##.a-class`). The first category is always - /// injected into every page, and makes up a relatively small number of rules in practice. - pub fn hostname_cosmetic_resources( - &self, - hostname: &str, - generichide: bool, - ) -> UrlSpecificResources { - let domain_str = { - let (start, end) = crate::url_parser::get_host_domain(hostname); - &hostname[start..end] - }; - - let (request_entities, request_hostnames) = hostname_domain_hashes(hostname, domain_str); - - let mut rules_that_apply = vec![]; - for hash in request_entities.iter().chain(request_hostnames.iter()) { - if let Some(specific_rules) = self.specific_rules.retrieve(hash) { - rules_that_apply.extend(specific_rules); - } - } - - let mut exceptions = HostnameExceptionsBuilder::default(); - - rules_that_apply.iter().for_each(|r| { - exceptions.insert_if_exception(r); - }); - - let rules_that_apply = rules_that_apply - .iter() - .map(|r| r.to_owned()) - .filter(|r| exceptions.allow_specific_rule(r)) - .collect::>(); - - let (hostname_hide_selectors, style_selectors, script_injections) = - hostname_specific_rules(&rules_that_apply[..]); - - let hide_selectors = if generichide { - hostname_hide_selectors - } else { - let mut hide_selectors = self - .misc_generic_selectors - .difference(&exceptions.hide_exceptions) - .cloned() - .collect::>(); - hostname_hide_selectors.into_iter().for_each(|sel| { - hide_selectors.insert(sel); - }); - hide_selectors - }; - - let mut injected_script = String::new(); - script_injections.iter().for_each(|s| { - if let Ok(filled_template) = self.scriptlets.get_scriptlet(s) { - injected_script += "try {\n"; - injected_script += &filled_template; - injected_script += "\n} catch ( e ) { }\n"; - } - }); - - UrlSpecificResources { - hide_selectors, - style_selectors, - exceptions: exceptions.hide_exceptions, - injected_script, - generichide, - } - } - - /// Sets the internal resources to be those provided, silently discarding errors. - /// - /// Use `add_resource` if error information is required. - pub fn use_resources(&mut self, resources: &[Resource]) { - let mut scriptlets = ScriptletResourceStorage::default(); - - resources.iter().for_each(|resource| { - let _result = scriptlets.add_resource(resource); - }); - - self.scriptlets = scriptlets; - } - - /// Adds a single scriptlet resource. - pub fn add_resource( - &mut self, - resource: &Resource, - ) -> Result<(), crate::resources::AddResourceError> { - self.scriptlets.add_resource(resource) - } -} - -/// Used internally to build hostname-specific rulesets by canceling out rules which match any -/// exceptions -#[derive(Default, Debug, PartialEq, Eq)] -struct HostnameExceptionsBuilder { - hide_exceptions: HashSet, - style_exceptions: HashSet<(String, String)>, - script_inject_exceptions: HashSet, -} - -impl HostnameExceptionsBuilder { - /// Saves the given rule if it's an exception, or ignores it otherwise. - pub fn insert_if_exception(&mut self, rule: &SpecificFilterType) { - use SpecificFilterType as Rule; - - match rule { - Rule::Hide(_) | Rule::Style(_, _) | Rule::ScriptInject(_) => (), - Rule::Unhide(sel) => { - self.hide_exceptions.insert(sel.clone()); - } - Rule::UnhideStyle(sel, style) => { - self.style_exceptions.insert((sel.clone(), style.clone())); - } - Rule::UnhideScriptInject(script) => { - self.script_inject_exceptions.insert(script.clone()); - } - } - } - - /// A generic selector is allowed if it is not excepted by this set of exceptions. - pub fn allow_generic_selector(&self, selector: &str) -> bool { - !self.hide_exceptions.contains(selector) - } - - /// Specific rules are allowed if they can be used to hide, restyle, or inject a script in the - /// context of this set of exceptions - i.e. if the rule itself is not an exception rule and - /// doesn't have a corresponding exception rule added previously. - pub fn allow_specific_rule(&self, rule: &SpecificFilterType) -> bool { - match rule { - SpecificFilterType::Hide(sel) => !self.hide_exceptions.contains(sel), - SpecificFilterType::Style(sel, style) => !self - .style_exceptions - .contains(&(sel.to_string(), style.to_string())), - SpecificFilterType::ScriptInject(sel) => !self.script_inject_exceptions.contains(sel), - _ => false, - } - } -} - -/// Each hostname-specific filter can be pointed to by several different hostnames, and each -/// hostname can correspond to several different filters. To effectively store and access those -/// filters by hostname, all the non-hostname information for filters is stored in per-hostname -/// "buckets" within a Vec, and each bucket is identified by its index. Hostname hashes are used as -/// keys to get the indices of relevant buckets, which are in turn used to retrieve all the filters -/// that apply. -#[derive(Deserialize, Serialize, Default)] -pub(crate) struct HostnameRuleDb { - #[serde(serialize_with = "crate::data_format::utils::stabilize_hashmap_serialization")] - db: HashMap>, -} - -impl HostnameRuleDb { - pub fn new() -> Self { - HostnameRuleDb { db: HashMap::new() } - } - - pub fn store_rule(&mut self, rule: CosmeticFilter) { - let kind = SpecificFilterType::from(&rule); - - if let Some(hostnames) = rule.hostnames { - hostnames.iter().for_each(|h| self.store(h, kind.clone())); - } - if let Some(entities) = rule.entities { - entities.iter().for_each(|e| self.store(e, kind.clone())); - } - - let kind = kind.negated(); - - if let Some(not_hostnames) = rule.not_hostnames { - not_hostnames - .iter() - .for_each(|h| self.store(h, kind.clone())); - } - if let Some(not_entities) = rule.not_entities { - not_entities - .iter() - .for_each(|e| self.store(e, kind.clone())); - } - } - - fn store(&mut self, hostname: &Hash, kind: SpecificFilterType) { - if let Some(bucket) = self.db.get_mut(hostname) { - bucket.push(kind); - } else { - self.db.insert(*hostname, vec![kind]); - } - } - - pub fn retrieve<'a>(&'a self, hostname: &Hash) -> Option<&'a [SpecificFilterType]> { - if let Some(bucket) = self.db.get(hostname) { - Some(bucket) - } else { - None - } - } -} - -/// Each variant describes a single rule that is specific to a particular hostname. -#[derive(Clone, Debug, Deserialize, Serialize)] -pub enum SpecificFilterType { - /// A simple hostname-specific hide rule, e.g. `example.com##.ad`. - /// - /// The parameter is the rule's CSS selector. - Hide(String), - /// A simple hostname-specific hide exception rule, e.g. `example.com#@#.ad`. - /// - /// The parameter is the rule's CSS selector. - Unhide(String), - - /// A hostname-specific rule with a custom style for an element, e.g. - /// `example.com##.ad:style(margin: 0)`. - /// - /// The parameters are the rule's selector and its additional style. - Style(String, String), - /// A hostname-specific exception rule for a custom style for an element, e.g. - /// `example.com#@#.ad:style(margin: 0)`. - /// - /// The parameters are the rule's selector and its additional style. - /// - /// In practice, this kind of rule does not appear in filter lists, although it is not - /// explicitly forbidden according to any syntax documentation. - UnhideStyle(String, String), - - /// A hostname-specific rule with a scriptlet to inject along with any arguments, e.g. - /// `example.com##+js(acis, Number.isNan)`. - /// - /// The parameter is the contents of the `+js(...)` syntax construct. - ScriptInject(String), - /// A hostname-specific rule to except a scriptlet to inject along with any arguments, e.g. - /// `example.com#@#+js(acis, Number.isNan)`. - /// - /// The parameter is the contents of the `+js(...)` syntax construct. - /// - /// In practice, these rules are extremely rare in filter lists. - UnhideScriptInject(String), -} - -/// This implementation assumes the given rule has hostname or entity constraints, and that the -/// appropriate 'hidden' generic rule has already been applied externally if necessary. -impl From<&CosmeticFilter> for SpecificFilterType { - fn from(rule: &CosmeticFilter) -> Self { - let unhide = rule.mask.contains(CosmeticFilterMask::UNHIDE); - - if let Some(ref style) = rule.style { - if unhide { - SpecificFilterType::UnhideStyle(rule.selector.clone(), style.clone()) - } else { - SpecificFilterType::Style(rule.selector.clone(), style.clone()) - } - } else if rule.mask.contains(CosmeticFilterMask::SCRIPT_INJECT) { - if unhide { - SpecificFilterType::UnhideScriptInject(rule.selector.clone()) - } else { - SpecificFilterType::ScriptInject(rule.selector.clone()) - } - } else { - if unhide { - SpecificFilterType::Unhide(rule.selector.clone()) - } else { - SpecificFilterType::Hide(rule.selector.clone()) - } - } - } -} - -impl SpecificFilterType { - pub fn negated(self) -> Self { - match self { - SpecificFilterType::Hide(sel) => SpecificFilterType::Unhide(sel), - SpecificFilterType::Unhide(sel) => SpecificFilterType::Hide(sel), - SpecificFilterType::Style(sel, style) => SpecificFilterType::UnhideStyle(sel, style), - SpecificFilterType::UnhideStyle(sel, style) => SpecificFilterType::Style(sel, style), - SpecificFilterType::ScriptInject(script) => { - SpecificFilterType::UnhideScriptInject(script) - } - SpecificFilterType::UnhideScriptInject(script) => { - SpecificFilterType::ScriptInject(script) - } - } - } -} - -fn hostname_domain_hashes(hostname: &str, domain: &str) -> (Vec, Vec) { - let request_entities = - crate::filters::cosmetic::get_entity_hashes_from_labels(hostname, domain); - let request_hostnames = - crate::filters::cosmetic::get_hostname_hashes_from_labels(hostname, domain); - - (request_entities, request_hostnames) -} - -#[cfg(test)] -mod cosmetic_cache_tests { - use super::*; - - fn cache_from_rules(rules: Vec<&str>) -> CosmeticFilterCache { - let parsed_rules = rules - .iter() - .map(|r| CosmeticFilter::parse(r, false).unwrap()) - .collect::>(); - - CosmeticFilterCache::from_rules(parsed_rules) - } - - #[test] - fn exceptions() { - let cfcache = cache_from_rules(vec!["~example.com##.item", "sub.example.com#@#.item2"]); - - let out = cfcache.hostname_cosmetic_resources("test.com", false); - let mut expected = UrlSpecificResources::empty(); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources("example.com", false); - expected.exceptions.insert(".item".into()); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources("sub.example.com", false); - expected.exceptions.insert(".item2".into()); - assert_eq!(out, expected); - } - - #[test] - fn exceptions2() { - let cfcache = cache_from_rules(vec!["example.com,~sub.example.com##.item"]); - - let out = cfcache.hostname_cosmetic_resources("test.com", false); - let mut expected = UrlSpecificResources::empty(); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources("example.com", false); - expected.hide_selectors.insert(".item".to_owned()); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources("sub.example.com", false); - let mut expected = UrlSpecificResources::empty(); - expected.exceptions.insert(".item".into()); - assert_eq!(out, expected); - } - - #[test] - fn style_exceptions() { - let cfcache = cache_from_rules(vec![ - "example.com,~sub.example.com##.element:style(background: #fff)", - "sub.test.example.com#@#.element:style(background: #fff)", - "a1.sub.example.com##.element", - "a2.sub.example.com##.element:style(background: #000)", - ]); - - let out = cfcache.hostname_cosmetic_resources("sub.example.com", false); - let mut expected = UrlSpecificResources::empty(); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources("sub.test.example.com", false); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources("a1.sub.example.com", false); - expected.hide_selectors.insert(".element".to_owned()); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources("test.example.com", false); - expected.hide_selectors.clear(); - expected - .style_selectors - .insert(".element".to_owned(), vec!["background: #fff".to_owned()]); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources("a2.sub.example.com", false); - expected.style_selectors.clear(); - expected - .style_selectors - .insert(".element".to_owned(), vec!["background: #000".to_owned()]); - assert_eq!(out, expected); - } - - #[test] - fn script_exceptions() { - use crate::resources::{MimeType, ResourceType}; - - let mut cfcache = cache_from_rules(vec![ - "example.com,~sub.example.com##+js(set-constant.js, atob, trueFunc)", - "sub.test.example.com#@#+js(set-constant.js, atob, trueFunc)", - "cosmetic.net##+js(nowebrtc.js)", - "g.cosmetic.net##+js(window.open-defuser.js)", - "c.g.cosmetic.net#@#+js(nowebrtc.js)", - ]); - - cfcache.use_resources(&[ - Resource { - name: "set-constant.js".into(), - aliases: vec![], - kind: ResourceType::Template, - content: base64::encode("set-constant.js, {{1}}, {{2}}"), - }, - Resource { - name: "nowebrtc.js".into(), - aliases: vec![], - kind: ResourceType::Mime(MimeType::ApplicationJavascript), - content: base64::encode("nowebrtc.js"), - }, - Resource { - name: "window.open-defuser.js".into(), - aliases: vec![], - kind: ResourceType::Mime(MimeType::ApplicationJavascript), - content: base64::encode("window.open-defuser.js"), - }, - ]); - - let out = cfcache.hostname_cosmetic_resources("sub.example.com", false); - let mut expected = UrlSpecificResources::empty(); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources("sub.test.example.com", false); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources("test.example.com", false); - expected.injected_script = - "try {\nset-constant.js, atob, trueFunc\n} catch ( e ) { }\n".to_owned(); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources("cosmetic.net", false); - expected.injected_script = "try {\nnowebrtc.js\n} catch ( e ) { }\n".to_owned(); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources("g.cosmetic.net", false); - expected.injected_script = "try {\nnowebrtc.js\n} catch ( e ) { }\ntry {\nwindow.open-defuser.js\n} catch ( e ) { }\n".to_owned(); - assert_eq!(out, expected); - - let out = cfcache.hostname_cosmetic_resources("c.g.cosmetic.net", false); - expected.injected_script = "try {\nwindow.open-defuser.js\n} catch ( e ) { }\n".to_owned(); - assert_eq!(out, expected); - } - - #[test] - fn matching_hidden_class_id_selectors() { - let rules = [ - "##.a-class", - "###simple-id", - "##.a-class .with .children", - "##.children .including #simple-id", - "##a.a-class", - ]; - let cfcache = CosmeticFilterCache::from_rules( - rules - .iter() - .map(|r| CosmeticFilter::parse(r, false).unwrap()) - .collect::>(), - ); - - let out = cfcache.hidden_class_id_selectors(&["with".into()], &[], &HashSet::default()); - assert_eq!(out, Vec::::new()); - - let out = cfcache.hidden_class_id_selectors(&[], &["with".into()], &HashSet::default()); - assert_eq!(out, Vec::::new()); - - let out = cfcache.hidden_class_id_selectors(&[], &["a-class".into()], &HashSet::default()); - assert_eq!(out, Vec::::new()); - - let out = - cfcache.hidden_class_id_selectors(&["simple-id".into()], &[], &HashSet::default()); - assert_eq!(out, Vec::::new()); - - let out = cfcache.hidden_class_id_selectors(&["a-class".into()], &[], &HashSet::default()); - assert_eq!(out, [".a-class", ".a-class .with .children"]); - - let out = cfcache.hidden_class_id_selectors( - &["children".into(), "a-class".into()], - &[], - &HashSet::default(), - ); - assert_eq!( - out, - [ - ".a-class", - ".children .including #simple-id", - ".a-class .with .children" - ] - ); - - let out = - cfcache.hidden_class_id_selectors(&[], &["simple-id".into()], &HashSet::default()); - assert_eq!(out, ["#simple-id"]); - - let out = cfcache.hidden_class_id_selectors( - &["children".into(), "a-class".into()], - &["simple-id".into()], - &HashSet::default(), - ); - assert_eq!( - out, - [ - ".a-class", - "#simple-id", - ".children .including #simple-id", - ".a-class .with .children" - ] - ); - } - - #[test] - fn class_id_exceptions() { - let rules = vec![ - "##.a-class", - "###simple-id", - "##.a-class .with .children", - "##.children .including #simple-id", - "##a.a-class", - "example.*#@#.a-class", - "~test.com###test-element", - ]; - let cfcache = CosmeticFilterCache::from_rules( - rules - .iter() - .map(|r| CosmeticFilter::parse(r, false).unwrap()) - .collect::>(), - ); - let exceptions = cfcache - .hostname_cosmetic_resources("example.co.uk", false) - .exceptions; - - let out = cfcache.hidden_class_id_selectors(&["a-class".into()], &[], &exceptions); - assert_eq!(out, [".a-class .with .children"]); - - let out = cfcache.hidden_class_id_selectors( - &["children".into(), "a-class".into()], - &["simple-id".into()], - &exceptions, - ); - assert_eq!( - out, - [ - "#simple-id", - ".children .including #simple-id", - ".a-class .with .children" - ] - ); - - let out = cfcache.hidden_class_id_selectors(&[], &["test-element".into()], &exceptions); - assert_eq!(out, ["#test-element"]); - - let exceptions = cfcache - .hostname_cosmetic_resources("a1.test.com", false) - .exceptions; - - let out = cfcache.hidden_class_id_selectors(&["a-class".into()], &[], &exceptions); - assert_eq!(out, [".a-class", ".a-class .with .children"]); - - let out = cfcache.hidden_class_id_selectors( - &["children".into(), "a-class".into()], - &["simple-id".into()], - &exceptions, - ); - assert_eq!( - out, - [ - ".a-class", - "#simple-id", - ".children .including #simple-id", - ".a-class .with .children" - ] - ); - - let out = cfcache.hidden_class_id_selectors(&[], &["test-element".into()], &exceptions); - assert_eq!(out, Vec::::new()); - } - - #[test] - fn misc_generic_exceptions() { - let rules = vec![ - "##a[href=\"bad.com\"]", - "##div > p", - "##a[href=\"notbad.com\"]", - "example.com#@#div > p", - "~example.com##a[href=\"notbad.com\"]", - ]; - let cfcache = CosmeticFilterCache::from_rules( - rules - .iter() - .map(|r| CosmeticFilter::parse(r, false).unwrap()) - .collect::>(), - ); - - let hide_selectors = cfcache - .hostname_cosmetic_resources("test.com", false) - .hide_selectors; - let mut expected_hides = HashSet::new(); - expected_hides.insert("a[href=\"bad.com\"]".to_owned()); - expected_hides.insert("div > p".to_owned()); - expected_hides.insert("a[href=\"notbad.com\"]".to_owned()); - assert_eq!(hide_selectors, expected_hides); - - let hide_selectors = cfcache - .hostname_cosmetic_resources("example.com", false) - .hide_selectors; - let mut expected_hides = HashSet::new(); - expected_hides.insert("a[href=\"bad.com\"]".to_owned()); - assert_eq!(hide_selectors, expected_hides); - } - - #[test] - fn apply_to_tld() { - use crate::resources::ResourceType; - - // toolforge.org and github.io are examples of TLDs with multiple segments. These rules - // should still be parsed correctly and applied on corresponding subdomains. - let rules = vec![ - "toolforge.org##+js(abort-on-property-read, noAdBlockers)", - "github.io##div.adToBlock", - ]; - let mut cfcache = CosmeticFilterCache::from_rules( - rules - .iter() - .map(|r| CosmeticFilter::parse(r, false).unwrap()) - .collect::>(), - ); - cfcache.use_resources(&[Resource { - name: "abort-on-property-read.js".into(), - aliases: vec!["aopr".to_string()], - kind: ResourceType::Template, - content: base64::encode("abort-on-property-read.js, {{1}}"), - }]); - - let injected_script = cfcache - .hostname_cosmetic_resources("antonok.toolforge.org", false) - .injected_script; - assert_eq!( - injected_script, - "try {\nabort-on-property-read.js, noAdBlockers\n} catch ( e ) { }\n" - ); - - let hide_selectors = cfcache - .hostname_cosmetic_resources("antonok.github.io", false) - .hide_selectors; - let mut expected_hides = HashSet::new(); - expected_hides.insert("div.adToBlock".to_owned()); - assert_eq!(hide_selectors, expected_hides); - } -} diff --git a/third_party/rust/adblock/v0_7/crate/src/data_format/legacy.rs b/third_party/rust/adblock/v0_7/crate/src/data_format/legacy.rs deleted file mode 100644 index 254e131d2dc6..000000000000 --- a/third_party/rust/adblock/v0_7/crate/src/data_format/legacy.rs +++ /dev/null @@ -1,404 +0,0 @@ -//! Contains representations of data from the adblocking engine in a -//! forwards-and-backwards-compatible format, as well as utilities for converting these to and from -//! the actual `Engine` components. -//! -//! The format itself is split into two parts for historical reasons. Any new fields should be -//! added to the _end_ of both `SerializeFormatRest` and `DeserializeFormatRest`. -//! -//! This particular data format is space-inefficient, has several unused fields, prevents some -//! dependency updates, and the lack of a version field makes upgrades difficult. It will be -//! removed in a future release. - -use std::collections::{HashMap, HashSet}; - -use flate2::read::GzDecoder; -use flate2::write::GzEncoder; -use flate2::Compression; -use rmp_serde_legacy as rmps; -use serde::{Deserialize, Serialize}; - -use crate::blocker::{Blocker, NetworkFilterList}; -use crate::cosmetic_filter_cache::{CosmeticFilterCache, HostnameRuleDb}; -use crate::filters::network::NetworkFilter; -use crate::resources::{RedirectResourceStorage, ScriptletResourceStorage}; -use crate::utils::is_eof_error; - -use super::{DeserializationError, SerializationError}; - -/// `_fuzzy_signature` is no longer used, and is removed from future format versions. -#[derive(Debug, Clone, Serialize)] -struct NetworkFilterLegacySerializeFmt<'a> { - mask: &'a crate::filters::network::NetworkFilterMask, - filter: &'a crate::filters::network::FilterPart, - opt_domains: &'a Option>, - opt_not_domains: &'a Option>, - redirect: &'a Option, - hostname: &'a Option, - csp: &'a Option, - bug: Option, - tag: &'a Option, - raw_line: Option, - id: &'a crate::utils::Hash, - _fuzzy_signature: Option>, - opt_domains_union: &'a Option, - opt_not_domains_union: &'a Option, -} - -/// Generic over `Borrow` because `tagged_filters_all` requires `&'a NetworkFilter` -/// while `NetworkFilterList` requires `&'a Arc`. -impl<'a, T> From<&'a T> for NetworkFilterLegacySerializeFmt<'a> -where - T: std::borrow::Borrow, -{ - fn from(v: &'a T) -> NetworkFilterLegacySerializeFmt<'a> { - let v = v.borrow(); - NetworkFilterLegacySerializeFmt { - mask: &v.mask, - filter: &v.filter, - opt_domains: &v.opt_domains, - opt_not_domains: &v.opt_not_domains, - redirect: if v.is_redirect() { - &v.modifier_option - } else { - &None - }, - hostname: &v.hostname, - csp: if v.is_csp() { - &v.modifier_option - } else { - &None - }, - bug: None, - tag: &v.tag, - raw_line: v.raw_line.as_ref().map(|raw| *raw.clone()), - id: &v.id, - _fuzzy_signature: None, - opt_domains_union: &v.opt_domains_union, - opt_not_domains_union: &v.opt_not_domains_union, - } - } -} - -/// Forces a `NetworkFilterList` to be serialized with the legacy filter format by converting to an -/// intermediate representation that is constructed with `NetworkFilterLegacyFmt` instead. -fn serialize_legacy_network_filter_list( - list: &NetworkFilterList, - s: S, -) -> Result -where - S: serde::Serializer, -{ - #[derive(Serialize, Default)] - struct NetworkFilterListLegacySerializeFmt<'a> { - #[serde(serialize_with = "crate::data_format::utils::stabilize_hashmap_serialization")] - filter_map: HashMap>>, - } - - let legacy_list = NetworkFilterListLegacySerializeFmt { - filter_map: list - .filter_map - .iter() - .map(|(k, v)| (*k, v.iter().map(|f| f.into()).collect())) - .collect(), - }; - - legacy_list.serialize(s) -} - -/// Forces a `NetworkFilter` slice to be serialized with the legacy filter format by converting to -/// an intermediate representation that is constructed with `NetworkFilterLegacyFmt` instead. -fn serialize_legacy_network_filter_vec(vec: &[NetworkFilter], s: S) -> Result -where - S: serde::Serializer, -{ - let legacy_vec: Vec<_> = vec - .iter() - .map(NetworkFilterLegacySerializeFmt::from) - .collect(); - - legacy_vec.serialize(s) -} - -/// Provides structural aggregration of referenced adblock engine data to allow for allocation-free -/// serialization. -/// -/// Note that this does not implement `Serialize` directly, as it is composed of two parts which -/// must be serialized independently. Instead, use the `serialize` method. -pub struct SerializeFormat<'a> { - part1: SerializeFormatPt1<'a>, - rest: SerializeFormatRest<'a>, -} - -impl<'a> SerializeFormat<'a> { - pub fn serialize(&self) -> Result, SerializationError> { - let mut gz = GzEncoder::new(Vec::new(), Compression::default()); - rmps::encode::write(&mut gz, &self.part1)?; - rmps::encode::write(&mut gz, &self.rest)?; - let compressed = gz.finish()?; - Ok(compressed) - } -} - -#[derive(Serialize)] -struct SerializeFormatPt1<'a> { - #[serde(serialize_with = "serialize_legacy_network_filter_list")] - csp: &'a NetworkFilterList, - #[serde(serialize_with = "serialize_legacy_network_filter_list")] - exceptions: &'a NetworkFilterList, - #[serde(serialize_with = "serialize_legacy_network_filter_list")] - importants: &'a NetworkFilterList, - #[serde(serialize_with = "serialize_legacy_network_filter_list")] - redirects: &'a NetworkFilterList, - #[serde(serialize_with = "serialize_legacy_network_filter_list")] - filters_tagged: &'a NetworkFilterList, - #[serde(serialize_with = "serialize_legacy_network_filter_list")] - filters: &'a NetworkFilterList, - - #[serde(serialize_with = "serialize_legacy_network_filter_vec")] - tagged_filters_all: &'a Vec, - - _debug: bool, - enable_optimizations: bool, - - // This field exists for backwards compatibility only. - _unused: bool, - // This field exists for backwards compatibility only, and *must* be true. - _unused2: bool, - - resources: &'a RedirectResourceStorage, -} - -#[derive(Serialize)] -struct SerializeFormatRest<'a> { - simple_class_rules: &'a HashSet, - simple_id_rules: &'a HashSet, - complex_class_rules: &'a HashMap>, - complex_id_rules: &'a HashMap>, - - specific_rules: &'a HostnameRuleDb, - - misc_generic_selectors: &'a HashSet, - - scriptlets: &'a ScriptletResourceStorage, - - #[serde(serialize_with = "serialize_legacy_network_filter_list")] - generic_hide: &'a NetworkFilterList, -} - -/// `_fuzzy_signature` is no longer used, and is cleaned up from future format versions. -#[derive(Debug, Clone, Deserialize)] -pub(crate) struct NetworkFilterLegacyDeserializeFmt { - pub mask: crate::filters::network::NetworkFilterMask, - pub filter: crate::filters::network::FilterPart, - pub opt_domains: Option>, - pub opt_not_domains: Option>, - pub redirect: Option, - pub hostname: Option, - pub csp: Option, - pub bug: Option, - pub tag: Option, - pub raw_line: Option, - pub id: crate::utils::Hash, - _fuzzy_signature: Option>, - pub opt_domains_union: Option, - pub opt_not_domains_union: Option, -} - -impl From for NetworkFilter { - fn from(v: NetworkFilterLegacyDeserializeFmt) -> Self { - Self { - mask: v.mask, - filter: v.filter, - opt_domains: v.opt_domains, - opt_not_domains: v.opt_not_domains, - modifier_option: v.redirect.or(v.csp), - hostname: v.hostname, - tag: v.tag, - raw_line: v.raw_line.map(Box::new), - id: v.id, - opt_domains_union: v.opt_domains_union, - opt_not_domains_union: v.opt_not_domains_union, - } - } -} - -#[derive(Debug, Deserialize, Default)] -pub(crate) struct NetworkFilterListLegacyDeserializeFmt { - pub filter_map: HashMap>, -} - -impl From for NetworkFilterList { - fn from(v: NetworkFilterListLegacyDeserializeFmt) -> Self { - Self { - filter_map: v - .filter_map - .into_iter() - .map(|(k, v)| { - ( - k, - v.into_iter() - .map(|f| std::sync::Arc::new(f.into())) - .collect(), - ) - }) - .collect(), - } - } -} - -/// Structural representation of adblock engine data that can be built up from deserialization and -/// used directly to construct new `Engine` components without unnecessary allocation. -/// -/// Note that this does not implement `Deserialize` directly, as it is composed of two parts which -/// must be deserialized independently. Instead, use the `deserialize` method. -pub struct DeserializeFormat { - part1: DeserializeFormatPart1, - rest: DeserializeFormatRest, -} - -impl DeserializeFormat { - pub fn deserialize(serialized: &[u8]) -> Result { - let mut gz = GzDecoder::new(serialized); - let part1: DeserializeFormatPart1 = rmps::decode::from_read(&mut gz)?; - let rest = match rmps::decode::from_read(&mut gz) { - Ok(rest) => rest, - Err(ref e) if is_eof_error(e) => Default::default(), - Err(e) => return Err(e.into()), - }; - Ok(Self { part1, rest }) - } -} - -#[derive(Deserialize)] -struct DeserializeFormatPart1 { - csp: NetworkFilterListLegacyDeserializeFmt, - exceptions: NetworkFilterListLegacyDeserializeFmt, - importants: NetworkFilterListLegacyDeserializeFmt, - redirects: NetworkFilterListLegacyDeserializeFmt, - filters_tagged: NetworkFilterListLegacyDeserializeFmt, - filters: NetworkFilterListLegacyDeserializeFmt, - - tagged_filters_all: Vec, - - debug: bool, - enable_optimizations: bool, - - // This field exists for backwards compatibility only. - _unused: bool, - // This field exists for backwards compatibility only, and *must* be true. - _unused2: bool, - - #[serde(default)] - resources: RedirectResourceStorage, -} - -/// Any fields added to this must include the `#[serde(default)]` annotation, or another serde -/// annotation that will allow the format to gracefully handle missing fields when deserializing -/// from older versions of the format. -#[derive(Deserialize, Default)] -struct DeserializeFormatRest { - #[serde(default)] - simple_class_rules: HashSet, - #[serde(default)] - simple_id_rules: HashSet, - #[serde(default)] - complex_class_rules: HashMap>, - #[serde(default)] - complex_id_rules: HashMap>, - - #[serde(default)] - specific_rules: HostnameRuleDb, - - #[serde(default)] - misc_generic_selectors: HashSet, - - #[serde(default)] - scriptlets: ScriptletResourceStorage, - - #[serde(default)] - generic_hide: NetworkFilterListLegacyDeserializeFmt, -} - -impl<'a> From<(&'a Blocker, &'a CosmeticFilterCache)> for SerializeFormat<'a> { - fn from(v: (&'a Blocker, &'a CosmeticFilterCache)) -> Self { - let (blocker, cfc) = v; - Self { - part1: SerializeFormatPt1 { - csp: &blocker.csp, - exceptions: &blocker.exceptions, - importants: &blocker.importants, - redirects: &blocker.redirects, - filters_tagged: &blocker.filters_tagged, - filters: &blocker.filters, - - tagged_filters_all: &blocker.tagged_filters_all, - - _debug: true, - enable_optimizations: blocker.enable_optimizations, - _unused: true, - _unused2: true, - - resources: &blocker.resources, - }, - rest: SerializeFormatRest { - simple_class_rules: &cfc.simple_class_rules, - simple_id_rules: &cfc.simple_id_rules, - complex_class_rules: &cfc.complex_class_rules, - complex_id_rules: &cfc.complex_id_rules, - - specific_rules: &cfc.specific_rules, - - misc_generic_selectors: &cfc.misc_generic_selectors, - - scriptlets: &cfc.scriptlets, - - generic_hide: &blocker.generic_hide, - }, - } - } -} - -impl From for (Blocker, CosmeticFilterCache) { - fn from(v: DeserializeFormat) -> Self { - ( - Blocker { - csp: v.part1.csp.into(), - exceptions: v.part1.exceptions.into(), - importants: v.part1.importants.into(), - redirects: v.part1.redirects.into(), - removeparam: NetworkFilterList::default(), - filters_tagged: v.part1.filters_tagged.into(), - filters: v.part1.filters.into(), - - tags_enabled: Default::default(), - tagged_filters_all: v - .part1 - .tagged_filters_all - .into_iter() - .map(|f| f.into()) - .collect(), - - enable_optimizations: v.part1.enable_optimizations, - - resources: v.part1.resources, - #[cfg(feature = "object-pooling")] - pool: Default::default(), - regex_manager: Default::default(), - - generic_hide: v.rest.generic_hide.into(), - }, - CosmeticFilterCache { - simple_class_rules: v.rest.simple_class_rules, - simple_id_rules: v.rest.simple_id_rules, - complex_class_rules: v.rest.complex_class_rules, - complex_id_rules: v.rest.complex_id_rules, - - specific_rules: v.rest.specific_rules, - - misc_generic_selectors: v.rest.misc_generic_selectors, - - scriptlets: v.rest.scriptlets, - }, - ) - } -} diff --git a/third_party/rust/adblock/v0_7/crate/src/engine.rs b/third_party/rust/adblock/v0_7/crate/src/engine.rs deleted file mode 100644 index bdaa26e26991..000000000000 --- a/third_party/rust/adblock/v0_7/crate/src/engine.rs +++ /dev/null @@ -1,891 +0,0 @@ -//! The adblock [`Engine`] is the primary interface for adblocking. - -use crate::blocker::{Blocker, BlockerError, BlockerOptions, BlockerResult}; -use crate::cosmetic_filter_cache::{CosmeticFilterCache, UrlSpecificResources}; -use crate::lists::{FilterSet, ParseOptions}; -use crate::regex_manager::RegexManagerDiscardPolicy; -use crate::request::Request; -use crate::resources::{Resource, RedirectResource}; - -use std::collections::HashSet; - -/// Drives high-level blocking logic and is responsible for loading filter lists into an optimized -/// format that can be queried efficiently. -/// -/// For performance optimization reasons, the [`Engine`] is not designed to have rules added or -/// removed after its initial creation. Making changes to the rules loaded is accomplished by -/// creating a new engine to replace it. -/// -/// ## Usage -/// -/// ### Initialization -/// -/// You'll first want to combine all of your filter lists in a [`FilterSet`], which will parse list -/// header metadata. Once all lists have been composed together, you can call -/// [`Engine::from_filter_set`] to start using them for blocking. -/// -/// You may also want to supply certain assets for `$redirect` filters and `##+js(...)` scriptlet -/// injections. These are known as [`Resource`]s, and can be provided with -/// [`Engine::use_resources`]. See the [`crate::resources`] module for more information. -/// -/// ### Network blocking -/// -/// Use the [`Engine::check_network_urls`] method to determine how to handle a network request. -/// -/// If you _only_ need network blocking, consider using a [`Blocker`] directly. -/// -/// ### Cosmetic filtering -/// -/// Call [`Engine::url_cosmetic_resources`] to determine what actions should be taken to prepare a -/// particular page before it starts loading. -/// -/// Once the page has been loaded, any new CSS classes or ids that appear on the page should be passed to -/// [`Engine::hidden_class_id_selectors`] on an ongoing basis to determine additional elements that -/// should be hidden dynamically. -pub struct Engine { - pub blocker: Blocker, - cosmetic_cache: CosmeticFilterCache, -} - -impl Default for Engine { - /// Equivalent to `Engine::new(true)`. - fn default() -> Self { - Self::new(true) - } -} - -pub struct EngineDebugInfo { - pub blocker_debug_info: crate::blocker::BlockerDebugInfo, -} - -impl Engine { - /// Creates a new adblocking `Engine`. `Engine`s created without rules should generally only be - /// used with deserialization. - /// - `optimize` specifies whether or not to attempt to compress the internal representation by - /// combining similar rules. - pub fn new(optimize: bool) -> Self { - let blocker_options = BlockerOptions { - enable_optimizations: optimize, - }; - - Self { - blocker: Blocker::new(vec![], &blocker_options), - cosmetic_cache: CosmeticFilterCache::new(), - } - } - - /// Loads rules in a single format, enabling optimizations and discarding debug information. - pub fn from_rules(rules: &[String], opts: ParseOptions) -> Self { - let mut filter_set = FilterSet::new(false); - filter_set.add_filters(rules, opts); - Self::from_filter_set(filter_set, true) - } - - /// Loads rules, enabling optimizations and including debug information. - pub fn from_rules_debug(rules: &[String], opts: ParseOptions) -> Self { - Self::from_rules_parametrised(rules, opts, true, true) - } - - pub fn from_rules_parametrised(filter_rules: &[String], opts: ParseOptions, debug: bool, optimize: bool) -> Self { - let mut filter_set = FilterSet::new(debug); - filter_set.add_filters(filter_rules, opts); - Self::from_filter_set(filter_set, optimize) - } - - /// Loads rules from the given `FilterSet`. It is recommended to use a `FilterSet` when adding - /// rules from multiple sources. - pub fn from_filter_set(set: FilterSet, optimize: bool) -> Self { - let FilterSet { network_filters, cosmetic_filters, .. } = set; - - let blocker_options = BlockerOptions { - enable_optimizations: optimize, - }; - - Self { - blocker: Blocker::new(network_filters, &blocker_options), - cosmetic_cache: CosmeticFilterCache::from_rules(cosmetic_filters), - } - } - - /// Serializes the `Engine` into a binary format so that it can be quickly reloaded later. - pub fn serialize_raw(&self) -> Result, BlockerError> { - use crate::data_format::SerializeFormat; - - let serialize_format = SerializeFormat::build(&self.blocker, &self.cosmetic_cache, false); - - serialize_format.serialize().map_err(|_e| { - BlockerError::SerializationError - }) - } - - /// Serializes the `Engine` into a compressed binary format so that it can be quickly reloaded later. - /// - /// The data format generated from this method is _not_ just a gzip compressed version of - /// `serialize_raw`; it is a distinct format. If you'd like to convert data between the two - /// formats, deserialize it into the `Engine` first, then serialize the appropriate type. - /// - /// This method will be removed in a future release. Going forwards, if you'd like to use a - /// compressed binary format, use `serialize_raw` and bring your own compression/decompression. - pub fn serialize_compressed(&self) -> Result, BlockerError> { - use crate::data_format::SerializeFormat; - - let serialize_format = SerializeFormat::build(&self.blocker, &self.cosmetic_cache, true); - - serialize_format.serialize().map_err(|_e| { - BlockerError::SerializationError - }) - } - - /// Deserialize the `Engine` from the binary format generated by `Engine::serialize_compressed` - /// or `Engine::serialize_raw`. The method will automatically select the correct - /// deserialization implementation. - pub fn deserialize(&mut self, serialized: &[u8]) -> Result<(), BlockerError> { - use crate::data_format::DeserializeFormat; - let current_tags = self.blocker.tags_enabled(); - let deserialize_format = DeserializeFormat::deserialize(serialized).map_err(|_e| { - BlockerError::DeserializationError - })?; - let (blocker, cosmetic_cache) = deserialize_format.build(); - self.blocker = blocker; - self.blocker.use_tags(¤t_tags.iter().map(|s| &**s).collect::>()); - self.cosmetic_cache = cosmetic_cache; - Ok(()) - } - - /// Check if a request for a network resource from `url`, of type `request_type`, initiated by - /// `source_url`, should be blocked. - pub fn check_network_urls(&self, url: &str, source_url: &str, request_type: &str) -> BlockerResult { - Request::from_urls(url, source_url, request_type) - .map(|request| { - self.blocker.check(&request) - }) - .unwrap_or_else(|_e| { - BlockerResult { - matched: false, - important: false, - redirect: None, - rewritten_url: None, - exception: None, - filter: None, - error: Some("Error parsing request".to_owned()) - } - }) - } - - pub fn check_network_urls_with_hostnames( - &self, - url: &str, - hostname: &str, - source_hostname: &str, - request_type: &str, - third_party_request: Option - ) -> BlockerResult { - let request = Request::from_urls_with_hostname(url, hostname, source_hostname, request_type, third_party_request); - self.blocker.check(&request) - } - - pub fn check_network_urls_with_hostnames_subset( - &self, - url: &str, - hostname: &str, - source_hostname: &str, - request_type: &str, - third_party_request: Option, - previously_matched_rule: bool, - force_check_exceptions: bool, - ) -> BlockerResult { - let request = Request::from_urls_with_hostname(url, hostname, source_hostname, request_type, third_party_request); - self.blocker.check_parameterised(&request, previously_matched_rule, force_check_exceptions) - } - - /// Returns a string containing any additional CSP directives that should be added to this - /// request's response. Only applies to document and subdocument requests. - /// - /// If multiple policies are present from different rules, they will be joined by commas. - pub fn get_csp_directives( - &self, - url: &str, - hostname: &str, - source_hostname: &str, - request_type: &str, - third_party_request: Option, - ) -> Option { - let request = Request::from_urls_with_hostname(url, hostname, source_hostname, request_type, third_party_request); - self.blocker.get_csp_directives(&request) - } - - /// Check if a given filter has been previously added to this `Engine`. - /// - /// Note that only network filters are currently supported by this method. - pub fn filter_exists(&self, filter: &str) -> bool { - use crate::filters::network::NetworkFilter; - let filter_parsed = NetworkFilter::parse(filter, false, Default::default()); - match filter_parsed.map(|f| self.blocker.filter_exists(&f)) { - Ok(exists) => exists, - Err(_e) => { - #[cfg(test)] - eprintln!("Encountered unparseable filter when checking for filter existence: {:?}", _e); - false - } - } - } - - /// Sets this engine's tags to be _only_ the ones provided in `tags`. - /// - /// Tags can be used to cheaply enable or disable network rules with a corresponding `$tag` - /// option. - pub fn use_tags(&mut self, tags: &[&str]) { - self.blocker.use_tags(tags); - } - - /// Sets this engine's tags to additionally include the ones provided in `tags`. - /// - /// Tags can be used to cheaply enable or disable network rules with a corresponding `$tag` - /// option. - pub fn enable_tags(&mut self, tags: &[&str]) { - self.blocker.enable_tags(tags); - } - - /// Sets this engine's tags to no longer include the ones provided in `tags`. - /// - /// Tags can be used to cheaply enable or disable network rules with a corresponding `$tag` - /// option. - pub fn disable_tags(&mut self, tags: &[&str]) { - self.blocker.disable_tags(tags); - } - - /// Checks if a given tag exists in this engine. - /// - /// Tags can be used to cheaply enable or disable network rules with a corresponding `$tag` - /// option. - pub fn tag_exists(&self, tag: &str) -> bool { - self.blocker.tags_enabled().contains(&tag.to_owned()) - } - - /// Sets this engine's resources to be _only_ the ones provided in `resources`. - pub fn use_resources(&mut self, resources: &[Resource]) { - self.blocker.use_resources(resources); - self.cosmetic_cache.use_resources(resources); - } - - /// Sets this engine's resources to additionally include `resource`. - pub fn add_resource(&mut self, resource: Resource) -> Result<(), crate::resources::AddResourceError> { - self.blocker.add_resource(&resource)?; - self.cosmetic_cache.add_resource(&resource)?; - Ok(()) - } - - /// Gets a previously added resource from the engine. - pub fn get_resource(&self, key: &str) -> Option { - self.blocker.get_resource(key).cloned() - } - - // Cosmetic filter functionality - - /// If any of the provided CSS classes or ids could cause a certain generic CSS hide rule - /// (i.e. `{ display: none !important; }`) to be required, this method will return a list of - /// CSS selectors corresponding to rules referencing those classes or ids, provided that the - /// corresponding rules are not excepted. - /// - /// `exceptions` should be passed directly from `UrlSpecificResources`. - pub fn hidden_class_id_selectors(&self, classes: &[String], ids: &[String], exceptions: &HashSet) -> Vec { - self.cosmetic_cache.hidden_class_id_selectors(classes, ids, exceptions) - } - - /// Returns a set of cosmetic filter resources required for a particular url. Once this has - /// been called, all CSS ids and classes on a page should be passed to - /// `hidden_class_id_selectors` to obtain any stylesheets consisting of generic rules (if the - /// returned `generichide` value is false). - pub fn url_cosmetic_resources(&self, url: &str) -> UrlSpecificResources { - let request = Request::from_url(url); - if request.is_err() { - return UrlSpecificResources::empty(); - } - let request = request.unwrap(); - - let generichide = self.blocker.check_generic_hide(&request); - self.cosmetic_cache.hostname_cosmetic_resources(&request.hostname, generichide) - } - - pub fn set_regex_discard_policy( - &mut self, - new_discard_policy: RegexManagerDiscardPolicy - ) { - self.blocker.set_regex_discard_policy(new_discard_policy); - } - - #[cfg(feature = "debug-info")] - pub fn discard_regex(&mut self, regex_id: u64) { - self.blocker.discard_regex(regex_id); - } - - #[cfg(feature = "debug-info")] - pub fn get_debug_info(&self) -> EngineDebugInfo { - EngineDebugInfo { blocker_debug_info: self.blocker.get_debug_info() } - } -} - -/// Static assertions for `Engine: Send + Sync` traits. -#[cfg(not(any(feature = "object-pooling", feature = "unsync-regex-caching")))] -fn _assertions() { - fn _assert_send() {} - fn _assert_sync() {} - - _assert_send::(); - _assert_sync::(); -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::resources::{ResourceType, MimeType}; - use crate::lists::FilterFormat; - - #[test] - fn tags_enable_adds_tags() { - let filters = vec![ - String::from("adv$tag=stuff"), - String::from("somelongpath/test$tag=stuff"), - String::from("||brianbondy.com/$tag=brian"), - String::from("||brave.com$tag=brian"), - ]; - let url_results = vec![ - ("http://example.com/advert.html", true), - ("http://example.com/somelongpath/test/2.html", true), - ("https://brianbondy.com/about", true), - ("https://brave.com/about", true), - ]; - - let mut engine = Engine::from_rules(&filters, Default::default()); - engine.enable_tags(&["stuff"]); - engine.enable_tags(&["brian"]); - - url_results.into_iter().for_each(|(url, expected_result)| { - let matched_rule = engine.check_network_urls(&url, "", ""); - if expected_result { - assert!(matched_rule.matched, "Expected match for {}", url); - } else { - assert!(!matched_rule.matched, "Expected no match for {}, matched with {:?}", url, matched_rule.filter); - } - }); - } - - #[test] - fn tags_disable_works() { - let filters = vec![ - String::from("adv$tag=stuff"), - String::from("somelongpath/test$tag=stuff"), - String::from("||brianbondy.com/$tag=brian"), - String::from("||brave.com$tag=brian"), - ]; - let url_results = vec![ - ("http://example.com/advert.html", false), - ("http://example.com/somelongpath/test/2.html", false), - ("https://brianbondy.com/about", true), - ("https://brave.com/about", true), - ]; - - let mut engine = Engine::from_rules(&filters, Default::default()); - engine.enable_tags(&["brian", "stuff"]); - engine.disable_tags(&["stuff"]); - - url_results.into_iter().for_each(|(url, expected_result)| { - let matched_rule = engine.check_network_urls(&url, "", ""); - if expected_result { - assert!(matched_rule.matched, "Expected match for {}", url); - } else { - assert!(!matched_rule.matched, "Expected no match for {}, matched with {:?}", url, matched_rule.filter); - } - }); - } - - #[test] - fn exception_tags_inactive_by_default() { - let filters = vec![ - String::from("adv"), - String::from("||brianbondy.com/$tag=brian"), - String::from("@@||brianbondy.com/$tag=brian"), - ]; - let url_results = vec![ - ("http://example.com/advert.html", true), - ("https://brianbondy.com/about", false), - ("https://brianbondy.com/advert", true), - ]; - - let engine = Engine::from_rules(&filters, Default::default()); - - url_results.into_iter().for_each(|(url, expected_result)| { - let matched_rule = engine.check_network_urls(&url, "", ""); - if expected_result { - assert!(matched_rule.matched, "Expected match for {}", url); - } else { - assert!(!matched_rule.matched, "Expected no match for {}, matched with {:?}", url, matched_rule.filter); - } - }); - } - - #[test] - fn exception_tags_works() { - let filters = vec![ - String::from("adv"), - String::from("||brianbondy.com/$tag=brian"), - String::from("@@||brianbondy.com/$tag=brian"), - ]; - let url_results = vec![ - ("http://example.com/advert.html", true), - ("https://brianbondy.com/about", false), - ("https://brianbondy.com/advert", false), - ]; - - let mut engine = Engine::from_rules(&filters, Default::default()); - engine.enable_tags(&["brian", "stuff"]); - - url_results.into_iter().for_each(|(url, expected_result)| { - let matched_rule = engine.check_network_urls(&url, "", ""); - if expected_result { - assert!(matched_rule.matched, "Expected match for {}", url); - } else { - assert!(!matched_rule.matched, "Expected no match for {}, matched with {:?}", url, matched_rule.filter); - } - }); - } - - #[test] - fn serialization_retains_tags() { - let filters = vec![ - String::from("adv$tag=stuff"), - String::from("somelongpath/test$tag=stuff"), - String::from("||brianbondy.com/$tag=brian"), - String::from("||brave.com$tag=brian"), - ]; - let url_results = vec![ - ("http://example.com/advert.html", true), - ("http://example.com/somelongpath/test/2.html", true), - ("https://brianbondy.com/about", false), - ("https://brave.com/about", false), - ]; - - let mut engine = Engine::from_rules(&filters, Default::default()); - engine.enable_tags(&["stuff"]); - engine.enable_tags(&["brian"]); - let serialized = engine.serialize_raw().unwrap(); - let mut deserialized_engine = Engine::default(); - deserialized_engine.enable_tags(&["stuff"]); - deserialized_engine.deserialize(&serialized).unwrap(); - - url_results.into_iter().for_each(|(url, expected_result)| { - let matched_rule = deserialized_engine.check_network_urls(&url, "", ""); - if expected_result { - assert!(matched_rule.matched, "Expected match for {}", url); - } else { - assert!(!matched_rule.matched, "Expected no match for {}, matched with {:?}", url, matched_rule.filter); - } - }); - } - - #[test] - fn deserialization_backwards_compatible_plain() { - // deserialization_generate_simple(); - // assert!(false); - let serialized: Vec = vec![31, 139, 8, 0, 0, 0, 0, 0, 0, 255, 1, 68, 0, 187, 255, 155, 145, 128, 145, 128, - 145, 128, 145, 128, 145, 128, 145, 129, 207, 202, 167, 36, 217, 43, 56, 97, 176, 145, 158, 145, 206, 0, 3, - 31, 255, 146, 1, 145, 169, 97, 100, 45, 98, 97, 110, 110, 101, 114, 192, 192, 192, 192, 192, 192, 192, 192, - 207, 186, 136, 69, 13, 115, 187, 170, 226, 192, 192, 192, 144, 194, 195, 194, 195, 207, 77, 26, 78, 68, 0, - 0, 0]; - - let mut deserialized_engine = Engine::default(); - deserialized_engine.deserialize(&serialized).unwrap(); - - let url = "http://example.com/ad-banner.gif"; - let matched_rule = deserialized_engine.check_network_urls(url, "", ""); - assert!(matched_rule.matched, "Expected match for {}", url); - } - - #[test] - fn deserialization_backwards_compatible_tags() { - // deserialization_generate_tags(); - // assert!(false); - let serialized: Vec = vec![31, 139, 8, 0, 0, 0, 0, 0, 0, 255, 149, 139, 49, 14, 64, 48, 24, 70, 137, 131, 88, - 108, 98, 148, 184, 135, 19, 252, 197, 218, 132, 3, 8, 139, 85, 126, 171, 132, 193, 32, 54, 71, 104, 218, 205, - 160, 139, 197, 105, 218, 166, 233, 5, 250, 125, 219, 203, 123, 43, 14, 238, 163, 124, 206, 228, 79, 11, 184, - 113, 195, 55, 136, 98, 181, 132, 120, 65, 157, 17, 160, 180, 233, 152, 221, 1, 164, 98, 178, 255, 242, 178, - 221, 231, 201, 0, 19, 122, 216, 92, 112, 161, 1, 58, 213, 199, 143, 114, 0, 0, 0]; - let mut deserialized_engine = Engine::default(); - - deserialized_engine.enable_tags(&[]); - deserialized_engine.deserialize(&serialized).unwrap(); - let url = "http://example.com/ad-banner.gif"; - let matched_rule = deserialized_engine.check_network_urls(url, "", ""); - assert!(!matched_rule.matched, "Expected NO match for {}", url); - - deserialized_engine.enable_tags(&["abc"]); - deserialized_engine.deserialize(&serialized).unwrap(); - - let url = "http://example.com/ad-banner.gif"; - let matched_rule = deserialized_engine.check_network_urls(url, "", ""); - assert!(matched_rule.matched, "Expected match for {}", url); - } - - #[test] - fn deserialization_backwards_compatible_resources() { - // deserialization_generate_resources(); - // assert!(false); - let serialized: Vec = vec![31, 139, 8, 0, 0, 0, 0, 0, 0, 255, 61, 139, 189, 10, 64, 80, 28, 197, 201, 46, - 229, 1, 44, 54, 201, 234, 117, 174, 143, 65, 233, 18, 6, 35, 118, 229, 127, 103, 201, 230, 99, 146, 39, - 184, 177, 25, 152, 61, 13, 238, 29, 156, 83, 167, 211, 175, 115, 90, 40, 184, 203, 235, 24, 244, 219, 176, - 209, 2, 29, 156, 130, 164, 61, 68, 132, 9, 121, 166, 131, 48, 246, 19, 74, 71, 28, 69, 113, 230, 231, 25, - 101, 186, 42, 121, 86, 73, 189, 42, 95, 103, 255, 102, 219, 183, 29, 170, 127, 68, 102, 150, 86, 28, 162, - 0, 247, 3, 163, 110, 154, 146, 145, 195, 175, 245, 47, 101, 250, 113, 201, 119, 0, 0, 0]; - - let mut deserialized_engine = Engine::default(); - deserialized_engine.deserialize(&serialized).unwrap(); - - let url = "http://example.com/ad-banner.gif"; - let matched_rule = deserialized_engine.check_network_urls(url, "", ""); - // This serialized DAT was generated prior to - // https://github.com/brave/adblock-rust/pull/185, so the `redirect` filter did not get - // duplicated into the list of blocking filters. - // - // TODO - The failure to match here is considered acceptable for now, as it's part of a - // breaking change (minor version bump). However, the test should be updated at some point. - //assert!(matched_rule.matched, "Expected match for {}", url); - assert_eq!(matched_rule.redirect, Some("data:text/plain;base64,".to_owned()), "Expected redirect to contain resource"); - } - - #[test] - fn deserialization_generate_simple() { - let mut engine = Engine::from_rules(&[ - "ad-banner".to_owned() - ], Default::default()); - let serialized = engine.serialize_compressed().unwrap(); - println!("Engine serialized: {:?}", serialized); - engine.deserialize(&serialized).unwrap(); - } - - #[test] - fn deserialization_generate_tags() { - let mut engine = Engine::from_rules(&[ - "ad-banner$tag=abc".to_owned() - ], Default::default()); - engine.use_tags(&["abc"]); - let serialized = engine.serialize_compressed().unwrap(); - println!("Engine serialized: {:?}", serialized); - engine.deserialize(&serialized).unwrap(); - } - - #[test] - fn deserialization_generate_resources() { - let mut engine = Engine::from_rules(&[ - "ad-banner$redirect=nooptext".to_owned() - ], Default::default()); - - let resources = vec![ - Resource { - name: "nooptext".to_string(), - aliases: vec![], - kind: ResourceType::Mime(MimeType::TextPlain), - content: base64::encode(""), - }, - Resource { - name: "noopcss".to_string(), - aliases: vec![], - kind: ResourceType::Mime(MimeType::TextPlain), - content: base64::encode(""), - }, - ]; - engine.use_resources(&resources); - - let serialized = engine.serialize_compressed().unwrap(); - println!("Engine serialized: {:?}", serialized); - engine.deserialize(&serialized).unwrap(); - } - - #[test] - fn redirect_resource_insertion_works() { - let mut engine = Engine::from_rules(&[ - "ad-banner$redirect=nooptext".to_owned() - ], Default::default()); - - engine.add_resource(Resource { - name: "nooptext".to_owned(), - aliases: vec![], - kind: ResourceType::Mime(MimeType::TextPlain), - content: "".to_owned(), - }).unwrap(); - - let url = "http://example.com/ad-banner.gif"; - let matched_rule = engine.check_network_urls(url, "", ""); - assert!(matched_rule.matched, "Expected match for {}", url); - assert_eq!(matched_rule.redirect, Some("data:text/plain;base64,".to_owned()), "Expected redirect to contain resource"); - } - - #[test] - fn redirect_resource_lookup_works() { - let script = base64::encode(r#" -(function() { - ; -})(); - - "#); - - let mut engine = Engine::default(); - - engine.add_resource(Resource { - name: "noopjs".to_owned(), - aliases: vec![], - kind: ResourceType::Mime(MimeType::ApplicationJavascript), - content: script.to_owned(), - }).unwrap(); - let inserted_resource = engine.get_resource("noopjs"); - assert!(inserted_resource.is_some()); - let resource = inserted_resource.unwrap(); - assert_eq!(resource.content_type, "application/javascript"); - assert_eq!(resource.data, script); - } - - #[test] - fn document() { - let filters = vec![ - String::from("||example.com$document"), - String::from("@@||sub.example.com$document"), - ]; - - let engine = Engine::from_rules_debug(&filters, Default::default()); - - assert!(engine.check_network_urls("https://example.com", "https://example.com", "document").matched); - assert!(!engine.check_network_urls("https://example.com", "https://example.com", "script").matched); - assert!(engine.check_network_urls("https://sub.example.com", "https://sub.example.com", "document").exception.is_some()); - } - - #[test] - fn implicit_all() { - { - let engine = Engine::from_rules_debug(&vec![String::from("||example.com^")], Default::default()); - assert!(engine.check_network_urls("https://example.com", "https://example.com", "document").matched); - } - { - let engine = Engine::from_rules_debug(&vec![String::from("||example.com^$first-party")], Default::default()); - assert!(engine.check_network_urls("https://example.com", "https://example.com", "document").matched); - } - { - let engine = Engine::from_rules_debug(&vec![String::from("||example.com^$script")], Default::default()); - assert!(!engine.check_network_urls("https://example.com", "https://example.com", "document").matched); - } - { - let engine = Engine::from_rules_debug(&vec![String::from("||example.com^$~script")], Default::default()); - assert!(!engine.check_network_urls("https://example.com", "https://example.com", "document").matched); - } - { - let engine = Engine::from_rules_debug(&vec![String::from("||example.com^$document"), String::from("@@||example.com^$generichide")], Default::default()); - assert!(engine.check_network_urls("https://example.com", "https://example.com", "document").matched); - } - { - let engine = Engine::from_rules_debug(&vec![String::from("example.com")], ParseOptions { format: FilterFormat::Hosts, ..Default::default() }); - assert!(engine.check_network_urls("https://example.com", "https://example.com", "document").matched); - } - { - let engine = Engine::from_rules_debug(&vec![String::from("||example.com/path")], Default::default()); - assert!(!engine.check_network_urls("https://example.com/path", "https://example.com/path", "document").matched); - } - { - let engine = Engine::from_rules_debug(&vec![String::from("||example.com/path^")], Default::default()); - assert!(!engine.check_network_urls("https://example.com/path", "https://example.com/path", "document").matched); - } - } - - #[test] - fn generichide() { - let filters = vec![ - String::from("##.donotblock"), - String::from("##a[href=\"generic.com\"]"), - - String::from("@@||example.com$generichide"), - String::from("example.com##.block"), - - String::from("@@||example2.com/test.html$generichide"), - String::from("example2.com##.block"), - ]; - let url_results = vec![ - ("https://example.com", vec![".block"], true), - ("https://example.com/test.html", vec![".block"], true), - ("https://example2.com", vec![".block", "a[href=\"generic.com\"]"], false), - ("https://example2.com/test.html", vec![".block"], true), - ]; - - let engine = Engine::from_rules(&filters, Default::default()); - - url_results.into_iter().for_each(|(url, expected_result, expected_generichide)| { - let result = engine.url_cosmetic_resources(url); - assert_eq!(result.hide_selectors, expected_result.iter().map(|s| s.to_string()).collect::>()); - assert_eq!(result.generichide, expected_generichide); - }); - } - - #[test] - fn important_redirect() { - let mut filter_set = FilterSet::new(true); - filter_set.add_filters(&vec![ - "||addthis.com^$important,3p,domain=~missingkids.com|~missingkids.org|~sainsburys.jobs|~sitecore.com|~amd.com".to_string(), - "||addthis.com/*/addthis_widget.js$script,redirect=addthis.com/addthis_widget.js".to_string(), - ], Default::default()); - let mut engine = Engine::from_filter_set(filter_set, false); - - engine.add_resource(Resource { - name: "addthis.com/addthis_widget.js".to_owned(), - aliases: vec![], - kind: ResourceType::Mime(MimeType::ApplicationJavascript), - content: base64::encode("window.addthis = undefined"), - }).unwrap(); - - let result = engine.check_network_urls("https://s7.addthis.com/js/250/addthis_widget.js?pub=resto", "https://www.rhmodern.com/catalog/product/product.jsp?productId=prod14970086&categoryId=cat7150028", "script"); - - assert!(result.redirect.is_some()); - } - - #[test] - fn check_match_case_regex_filtering() { - { - // match case without regex is discarded - let engine = Engine::from_rules_debug(&vec![String::from("ad.png$match-case")], Default::default()); - assert!(!engine.check_network_urls("https://example.com/ad.png", "https://example.com", "image").matched); - } - { - // /^https:\/\/[0-9a-z]{3,}\.[-a-z]{10,}\.(?:li[fv]e|top|xyz)\/[a-z]{8}\/\?utm_campaign=\w{40,}/$doc,match-case,domain=life|live|top|xyz - let engine = Engine::from_rules_debug(&vec![String::from(r#"/^https:\/\/[0-9a-z]{3,}\.[-a-z]{10,}\.(?:li[fv]e|top|xyz)\/[a-z]{8}\/\?utm_campaign=\w{40,}/$doc,match-case,domain=life|live|top|xyz"#)], Default::default()); - assert!(engine.check_network_urls("https://www.exampleaaa.xyz/testtest/?utm_campaign=aaaaaaaaaabbbbbbbbbbccccccccccdddddddddd", "https://www.exampleaaa.xyz/testtest/?utm_campaign=aaaaaaaaaabbbbbbbbbbccccccccccdddddddddd", "document").matched); - } - // fails - because of non-supported look around operator in rust regex https://github.com/rust-lang/regex/issues/127#issuecomment-154713666 - /*{ - // /^https?:\/\/((?!www)[a-z]{3,}|\d{2})?\.?[-0-9a-z]{6,}\.[a-z]{2,6}\/(?:[a-z]{6,8}\/)?\/?\?u=[0-9a-z]{7}&o=[0-9a-z]{7}/$doc,frame,match-case,domain=buzz|com|de|fun|guru|info|life|live|mobi|online|pw|site|space|top|us|xyz - let engine = Engine::from_rules_debug(&vec![String::from(r#"/^https?:\/\/((?!www)[a-z]{3,}|\d{2})?\.?[-0-9a-z]{6,}\.[a-z]{2,6}\/(?:[a-z]{6,8}\/)?\/?\?u=[0-9a-z]{7}&o=[0-9a-z]{7}/$doc,frame,match-case,domain=buzz|com|de|fun|guru|info|life|live|mobi|online|pw|site|space|top|us|xyz"#)], Default::default()); - assert!(engine.check_network_urls("https://example.com/aaaaaa/?u=aaaaaaa&o=bbbbbbb", - "https://example.com/aaaaaa/?u=aaaaaaa&o=bbbbbbb", - "document").matched); - }*/ - // fails - because of non-supported look around operator in rust regex https://github.com/rust-lang/regex/issues/127#issuecomment-154713666 - /*{ - // /^https:\/\/(?:www\d\.)?[-a-z]{6,}\.(?:com|info|net|org)\/(?=[-_a-zA-Z]{0,42}\d)(?=[-_0-9a-z]{0,42}[A-Z])[-_0-9a-zA-Z]{43}\/\?cid=[-_0-9a-zA-Z]{16,36}(?:&qs\d=\S+)?&sid=[_0-9a-f]{1,32}$/$doc,match-case,domain=com|info|net|org - let engine = Engine::from_rules_debug(&vec![String::from(r#"/^https:\/\/(?:www\d\.)?[-a-z]{6,}\.(?:com|info|net|org)\/(?=[-_a-zA-Z]{0,42}\d)(?=[-_0-9a-z]{0,42}[A-Z])[-_0-9a-zA-Z]{43}\/\?cid=[-_0-9a-zA-Z]{16,36}(?:&qs\d=\S+)?&sid=[_0-9a-f]{1,32}$/$doc,match-case,domain=com|info|net|org"#)], Default::default()); - assert!(engine.check_network_urls("https://www3.example.com/aaaaaaaaaabbbbbbbbbbccccccccccddddddddddAA5/?cid=aaaaaaaaaabbbbbb&qs5=\n&sid=a", - "https://www3.example.com/aaaaaaaaaabbbbbbbbbbccccccccccddddddddddAA5/?cid=aaaaaaaaaabbbbbb&qs5=\n&sid=a", - "document").matched); - }*/ - // fails - because of non-supported look around operator in rust regex https://github.com/rust-lang/regex/issues/127#issuecomment-154713666 - /*{ - // /^https:\/\/(?:www\d\.)?[-a-z]{6,}\.(?:com|info|net|org)\/(?=[-_a-zA-Z]{0,42}\d)(?=[-_0-9a-z]{0,42}[A-Z])[-_0-9a-zA-Z]{43}\/\?sid=[_0-9a-f]{1,32}(?:&qs\d=\S+)?&cid=[-_0-9a-zA-Z]{16,36}$/$doc,match-case,domain=com|info|net|org - let engine = Engine::from_rules_debug(&vec![String::from(r#"/^https:\/\/(?:www\d\.)?[-a-z]{6,}\.(?:com|info|net|org)\/(?=[-_a-zA-Z]{0,42}\d)(?=[-_0-9a-z]{0,42}[A-Z])[-_0-9a-zA-Z]{43}\/\?cid=[-_0-9a-zA-Z]{16,36}(?:&qs\d=\S+)?&sid=[_0-9a-f]{1,32}$/$doc,match-case,domain=com|info|net|org"#)], Default::default()); - assert!(engine.check_network_urls("https://www3.example.com/aaaaaaaaaabbbbbbbbbbccccccccccddddddddddAA5/?sid=1&qs1=\n&cid=aaaaaaaaaabbbbbb", - "https://www3.example.com/aaaaaaaaaabbbbbbbbbbccccccccccddddddddddAA5/?sid=1&qs1=\n&cid=aaaaaaaaaabbbbbb", - "document").matched); - }*/ - { - // /^http:\/\/[a-z]{5}\.[a-z]{5}\.com\/[a-z]{10}\.apk$/$doc,match-case,domain=com - let engine = Engine::from_rules_debug(&vec![String::from(r#"/^http:\/\/[a-z]{5}\.[a-z]{5}\.com\/[a-z]{10}\.apk$/$doc,match-case,domain=com"#)], Default::default()); - assert!(engine.check_network_urls("http://abcde.abcde.com/aaaaabbbbb.apk", "http://abcde.abcde.com/aaaaabbbbb.apk", "document").matched); - } - // fails - because of non-supported look around operator in rust regex https://github.com/rust-lang/regex/issues/127#issuecomment-154713666 - /*{ - // /\/[A-Z]\/[-0-9a-z]{5,}\.com\/(?:[0-9a-f]{2}\/){3}[0-9a-f]{32}\.js$/$script,1p,match-case - let engine = Engine::from_rules_debug(&vec![String::from(r#"/\/[A-Z]\/[-0-9a-z]{5,}\.com\/(?:[0-9a-f]{2}\/){3}[0-9a-f]{32}\.js$/$script,1p,match-case"#)], Default::default()); - assert!(engine.check_network_urls("/A/aaaaa.com/aa/bb/cc/aaaaaaaabbbbbbbbccccccccdddddddd.js", - "/A/aaaaa.com/aa/bb/cc/aaaaaaaabbbbbbbbccccccccdddddddd.js", - "script").matched); - }*/ - // fails - because of non-supported look around operator in rust regex https://github.com/rust-lang/regex/issues/127#issuecomment-154713666 - /*{ - // /^https?:\/\/(?:[a-z]{2}\.)?[0-9a-z]{7,16}\.com\/[a-z](?=[a-z]{0,25}[0-9A-Z])[0-9a-zA-Z]{3,26}\/(?:[1-5]\d{4}|[3-9]\d{3})\??(?:_=\d+|v=\d)?$/$frame,script,xhr,popup,3p,match-case - let engine = Engine::from_rules_debug(&vec![String::from(r#"/^https?:\/\/(?:[a-z]{2}\.)?[0-9a-z]{7,16}\.com\/[a-z](?=[a-z]{0,25}[0-9A-Z])[0-9a-zA-Z]{3,26}\/(?:[1-5]\d{4}|[3-9]\d{3})\??(?:_=\d+|v=\d)?$/$frame,script,xhr,popup,3p,match-case"#)], Default::default()); - assert!(engine.check_network_urls("https://aa.example.com/aAaaa/12222", - "https://aa.example.net/aAaaa/12222", - "frame").matched); - }*/ - // fails - because of non-supported look around operator in rust regex https://github.com/rust-lang/regex/issues/127#issuecomment-154713666 - /*{ - // /^https?:\/\/(?:[a-z]{2}\.)?[0-9a-z]{7,16}\.website\/[a-z](?=[a-z]{0,25}[0-9A-Z])[0-9a-zA-Z]{3,26}\/(?:[1-5]\d{4}|[3-9]\d{3})\??(?:_=\d+|v=\d)?$/$frame,script,xhr,popup,3p,match-case - let engine = Engine::from_rules_debug(&vec![String::from(r#"/^https?:\/\/(?:[a-z]{2}\.)?[0-9a-z]{7,16}\.website\/[a-z](?=[a-z]{0,25}[0-9A-Z])[0-9a-zA-Z]{3,26}\/(?:[1-5]\d{4}|[3-9]\d{3})\??(?:_=\d+|v=\d)?$/$frame,script,xhr,popup,3p,match-case"#)], Default::default()); - assert!(engine.check_network_urls("https://aa.example.website/aAaaa/12222", - "https://aa.example.website/aAaaa/12222", - "frame").matched); - }*/ - // fails - because of non-supported look around operator in rust regex https://github.com/rust-lang/regex/issues/127#issuecomment-154713666 - /*{ - // /^https?:\/\/[a-z]{8,15}\.top(\/(?:\d{1,5}|0NaN|articles?|browse|index|movie|news|pages?|static|view|web|wiki)){1,4}(?:\.html|\/)$/$frame,3p,match-case - let engine = Engine::from_rules_debug(&vec![String::from(r#"/^https?:\/\/[a-z]{8,15}\.top(\/(?:\d{1,5}|0NaN|articles?|browse|index|movie|news|pages?|static|view|web|wiki)){1,4}(?:\.html|\/)$/$frame,3p,match-case"#)], Default::default()); - assert!(engine.check_network_urls("https://examples.top/articles.html", - "https://examples.top/articles.html", - "frame").matched); - }*/ - { - // /^https?:\/\/[a-z]{8,15}\.top\/[a-z]{4,}\.json$/$xhr,3p,match-case - let engine = Engine::from_rules_debug(&vec![String::from(r#"/^https?:\/\/[a-z]{8,15}\.top\/[a-z]{4,}\.json$/$xhr,3p,match-case"#)], Default::default()); - assert!(engine.check_network_urls("https://examples.top/abcd.json", "https://examples.com/abcd.json", "xhr").matched); - } - // fails - inferring unescaped `$` inside regex pattern - /*{ - // /^https?:\/\/[a-z]{8,15}\.top\/[-a-z]{4,}\.css\?aHR0c[\/0-9a-zA-Z]{33,}=?=?$/$css,3p,match-case - let engine = Engine::from_rules_debug(&vec![String::from(r#"/^https?:\/\/[a-z]{8,15}\.top\/[-a-z]{4,}\.css\?aHR0c[\/0-9a-zA-Z]{33,}=?=?$/$css,3p,match-case"#)], Default::default()); - assert!(engine.check_network_urls("https://examples.top/abcd.css?aHR0c/aaaaaaaaaaAAAAAAAAAA000000000012==", - "https://examples.com/abcd.css?aHR0c/aaaaaaaaaaAAAAAAAAAA000000000012==", - "stylesheet").matched); - }*/ - // fails - inferring unescaped `$` inside regex pattern - /*{ - // /^https?:\/\/[a-z]{8,15}\.top\/[a-z]{4,}\.png\?aHR0c[\/0-9a-zA-Z]{33,}=?=?$/$image,3p,match-case - let engine = Engine::from_rules_debug(&vec![String::from(r#"/^https?:\/\/[a-z]{8,15}\.top\/[a-z]{4,}\.png\?aHR0c[\/0-9a-zA-Z]{33,}=?=?$/$image,3p,match-case"#)], Default::default()); - assert!(engine.check_network_urls("https://examples.top/abcd.png?aHR0c/aaaaaaaaaaAAAAAAAAAA000000000012==", - "https://examples.com/abcd.png?aHR0c/aaaaaaaaaaAAAAAAAAAA000000000012==", - "image").matched); - }*/ - // fails - because of non-supported look around operator in rust regex https://github.com/rust-lang/regex/issues/127#issuecomment-154713666 - /*{ - // /^https?:\/\/[a-z]{8,15}\.xyz(\/(?:\d{1,5}|0NaN|articles?|browse|index|movie|news|pages?|static|view|web|wiki)){1,4}(?:\.html|\/)$/$frame,3p,match-case - let engine = Engine::from_rules_debug(&vec![String::from(r#"/^https?:\/\/[a-z]{8,15}\.xyz(\/(?:\d{1,5}|0NaN|articles?|browse|index|movie|news|pages?|static|view|web|wiki)){1,4}(?:\.html|\/)$/$frame,3p,match-case"#)], Default::default()); - assert!(engine.check_network_urls("https://examples.xyz/articles.html", - "https://examples.xyz/articles.html", - "frame").matched); - }*/ - { - // /^https?:\/\/cdn\.[a-z]{4,6}\.xyz\/app\.js$/$script,3p,match-case - let engine = Engine::from_rules_debug(&vec![String::from(r#"/^https?:\/\/cdn\.[a-z]{4,6}\.xyz\/app\.js$/$script,3p,match-case"#)], Default::default()); - assert!(engine.check_network_urls("https://cdn.abcde.xyz/app.js", - "https://cdn.abcde.com/app.js", - "script").matched); - } - // fails - because of non-supported look around operator in rust regex https://github.com/rust-lang/regex/issues/127#issuecomment-154713666 - /*{ - // /^https:\/\/a\.[-0-9a-z]{4,16}\.(?:club|com?|cyou|info|net|ru|site|top?|xxx|xyz)\/(?=[a-z]{0,6}[0-9A-Z])[0-9a-zA-Z]{7}\.js$/$script,match-case - let engine = Engine::from_rules_debug(&vec![String::from(r#"/^https:\/\/a\.[-0-9a-z]{4,16}\.(?:club|com?|cyou|info|net|ru|site|top?|xxx|xyz)\/(?=[a-z]{0,6}[0-9A-Z])[0-9a-zA-Z]{7}\.js$/$script,match-case"#)], Default::default()); - assert!(engine.check_network_urls("https://a.abcd.club/aaaaaaA.js", - "https://a.abcd.club/aaaaaaA.js", - "script").matched); - }*/ - { - // /^https:\/\/cdn\.jsdelivr\.net\/npm\/[-a-z_]{4,22}@latest\/dist\/script\.min\.js$/$script,3p,match-case - let engine = Engine::from_rules_debug(&vec![String::from(r#"/^https:\/\/cdn\.jsdelivr\.net\/npm\/[-a-z_]{4,22}@latest\/dist\/script\.min\.js$/$script,3p,match-case"#)], Default::default()); - assert!(engine.check_network_urls("https://cdn.jsdelivr.net/npm/abcd@latest/dist/script.min.js", - "https://cdn.jsdelivr.com/npm/abcd@latest/dist/script.min.js", - "script").matched); - } - // fails - inferring unescaped `$` inside regex pattern - /*{ - // /^https?:\/\/[-.0-9a-z]+\/script\.js$/$script,1p,strict3p,match-case - let engine = Engine::from_rules_debug(&vec![String::from(r#"/^https?:\/\/[-.0-9a-z]+\/script\.js$/$script,1p,strict3p,match-case"#)], Default::default()); - assert!(engine.check_network_urls("https://www.example.com/script.js", - "https://www.abc.com/script.js", - "script").matched); - }*/ - { - let engine = Engine::from_rules_debug(&vec![String::from(r#"/tesT߶/$domain=example.com"#)], Default::default()); - assert!(engine.check_network_urls("https://example.com/tesT߶", - "https://example.com", - "script").matched); - } - // fails - punycoded domain - /*{ - let engine = Engine::from_rules_debug(&vec![String::from(r#"/tesT߶/$domain=example.com"#)], Default::default()); - assert!(engine.check_network_urls("https://example-tesT߶.com/tesT", - "https://example.com", - "script").matched); - }*/ - } -} diff --git a/third_party/rust/adblock/v0_7/crate/src/resources/mod.rs b/third_party/rust/adblock/v0_7/crate/src/resources/mod.rs deleted file mode 100644 index c702cf709b8c..000000000000 --- a/third_party/rust/adblock/v0_7/crate/src/resources/mod.rs +++ /dev/null @@ -1,281 +0,0 @@ -//! In adblocking terms, [`Resource`]s are special placeholder scripts, images, -//! video files, etc. that can be returned as drop-in replacements for harmful -//! equivalents from remote servers. Resources also encompass scriptlets, which -//! can be injected into pages to inhibit malicious behavior. -//! -//! If the `resource-assembler` feature is enabled, the -#![cfg_attr(not(feature = "resource-assembler"), doc="`resource_assembler`")] -#![cfg_attr(feature = "resource-assembler", doc="[`resource_assembler`]")] -//! module will assist with the construction of [`Resource`]s directly from the uBlock Origin -//! project. - -#[cfg(feature = "resource-assembler")] -pub mod resource_assembler; - -mod scriptlet_resource_storage; -pub(crate) use scriptlet_resource_storage::ScriptletResourceStorage; - -use memchr::memrchr as find_char_reverse; -use serde::{Deserialize, Serialize}; -use thiserror::Error; -use std::collections::HashMap; - -/// Struct representing a resource that can be used by an adblocking engine. -/// -/// - `name`: Represents the primary name of the resource, often a filename -/// -/// - `aliases`: Represents secondary names that can be used to access the resource -/// -/// - `kind`: How to interpret the resource data within `content` -/// -/// - `content`: The resource data, encoded using standard base64 configuration -#[derive(Serialize, Deserialize)] -pub struct Resource { - pub name: String, - pub aliases: Vec, - pub kind: ResourceType, - pub content: String, -} - -/// Different ways that the data within the `content` field of a `Resource` can be interpreted. -/// -/// - `Mime(type)` - interpret the data according to the MIME type represented by `type` -/// -/// - `Template` - interpret the data as a Javascript scriptlet template, with embedded template -/// parameters in the form of `{{1}}`, `{{2}}`, etc. -#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] -#[serde(rename_all = "lowercase")] -pub enum ResourceType { - Mime(MimeType), - Template, -} - -#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] -#[serde(into = "String")] -#[serde(from = "std::borrow::Cow<'static, str>")] -pub enum MimeType { - TextCss, - ImageGif, - TextHtml, - ApplicationJavascript, - AudioMp3, - VideoMp4, - ImagePng, - TextPlain, - TextXml, - Unknown, -} - -#[derive(Debug, Error, PartialEq)] -pub enum AddResourceError { - #[error("invalid base64 content")] - InvalidBase64Content, - #[error("invalid utf-8 content")] - InvalidUtf8Content, -} - -impl From for AddResourceError { - fn from(_: base64::DecodeError) -> Self { - AddResourceError::InvalidBase64Content - } -} - -impl From for AddResourceError { - fn from(_: std::string::FromUtf8Error) -> Self { - AddResourceError::InvalidUtf8Content - } -} - -#[derive(Serialize, Deserialize, Debug, PartialEq, Clone)] -pub struct RedirectResource { - pub content_type: String, - pub data: String, -} - -#[derive(Serialize, Deserialize, Debug, PartialEq, Default)] -pub struct RedirectResourceStorage { - #[serde(serialize_with = "crate::data_format::utils::stabilize_hashmap_serialization")] - pub resources: HashMap, -} - -impl MimeType { - /// Infers a resource's MIME type according to the extension of its path - pub fn from_extension(resource_path: &str) -> Self { - if let Some(extension_index) = find_char_reverse(b'.', resource_path.as_bytes()) { - match &resource_path[extension_index + 1..] { - "css" => MimeType::TextCss, - "gif" => MimeType::ImageGif, - "html" => MimeType::TextHtml, - "js" => MimeType::ApplicationJavascript, - "mp3" => MimeType::AudioMp3, - "mp4" => MimeType::VideoMp4, - "png" => MimeType::ImagePng, - "txt" => MimeType::TextPlain, - "xml" => MimeType::TextXml, - _ => { - #[cfg(test)] - eprintln!("Unrecognized file extension on: {:?}", resource_path); - MimeType::Unknown - } - } - } else { - MimeType::Unknown - } - } -} - -impl RedirectResourceStorage { - pub fn from_resources(resources: &[Resource]) -> Self { - let mut redirectable_resources: HashMap = HashMap::new(); - - resources - .iter() - .filter_map(|descriptor| { - if let ResourceType::Mime(ref content_type) = descriptor.kind { - let resource = RedirectResource { - content_type: content_type.clone().into(), - data: descriptor.content.to_owned(), - }; - Some(( - descriptor.name.to_owned(), - descriptor.aliases.to_owned(), - resource, - )) - } else { - None - } - }) - .for_each(|(name, res_aliases, resource)| { - res_aliases.iter().for_each(|alias| { - redirectable_resources.insert(alias.to_owned(), resource.clone()); - }); - redirectable_resources.insert(name, resource); - }); - - Self { - resources: redirectable_resources, - } - } - - pub fn get_resource(&self, name: &str) -> Option<&RedirectResource> { - self.resources.get(name) - } - - /// Adds a resource. Only has an effect for mimetyped scriptlets. - pub fn add_resource(&mut self, resource: &Resource) -> Result<(), AddResourceError> { - if let ResourceType::Mime(ref content_type) = resource.kind { - // Ensure the resource contents are valid base64 - let decoded = base64::decode(&resource.content)?; - match content_type { - // Ensure any text contents are also valid utf8 - MimeType::ApplicationJavascript - | MimeType::TextPlain - | MimeType::TextHtml - | MimeType::TextXml => { - let _ = String::from_utf8(decoded)?; - } - _ => (), - } - - let name = resource.name.to_owned(); - let redirect_resource = RedirectResource { - content_type: content_type.clone().into(), - data: resource.content.to_owned(), - }; - resource.aliases.iter().for_each(|alias| { - self.resources - .insert(alias.to_owned(), redirect_resource.clone()); - }); - self.resources.insert(name, redirect_resource); - } - Ok(()) - } -} - -impl From> for MimeType { - fn from(v: std::borrow::Cow<'static, str>) -> Self { - v.as_ref().into() - } -} - -impl From<&str> for MimeType { - fn from(v: &str) -> Self { - match v { - "text/css" => MimeType::TextCss, - "image/gif" => MimeType::ImageGif, - "text/html" => MimeType::TextHtml, - "application/javascript" => MimeType::ApplicationJavascript, - "audio/mp3" => MimeType::AudioMp3, - "video/mp4" => MimeType::VideoMp4, - "image/png" => MimeType::ImagePng, - "text/plain" => MimeType::TextPlain, - "text/xml" => MimeType::TextXml, - _ => MimeType::Unknown, - } - } -} - -impl From for String { - fn from(v: MimeType) -> Self { - match v { - MimeType::TextCss => "text/css", - MimeType::ImageGif => "image/gif", - MimeType::TextHtml => "text/html", - MimeType::ApplicationJavascript => "application/javascript", - MimeType::AudioMp3 => "audio/mp3", - MimeType::VideoMp4 => "video/mp4", - MimeType::ImagePng => "image/png", - MimeType::TextPlain => "text/plain", - MimeType::TextXml => "text/xml", - MimeType::Unknown => "application/octet-stream", - } - .to_owned() - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn get_resource_by_name() { - let mut storage = RedirectResourceStorage::default(); - storage - .add_resource(&Resource { - name: "name.js".to_owned(), - aliases: vec![], - kind: ResourceType::Mime(MimeType::ApplicationJavascript), - content: base64::encode("resource data"), - }) - .unwrap(); - - assert_eq!( - storage.get_resource("name.js"), - Some(&RedirectResource { - content_type: "application/javascript".to_owned(), - data: base64::encode("resource data"), - }) - ); - } - - #[test] - fn get_resource_by_alias() { - let mut storage = RedirectResourceStorage::default(); - storage - .add_resource(&Resource { - name: "name.js".to_owned(), - aliases: vec!["alias.js".to_owned()], - kind: ResourceType::Mime(MimeType::ApplicationJavascript), - content: base64::encode("resource data"), - }) - .unwrap(); - - assert_eq!( - storage.get_resource("alias.js"), - Some(&RedirectResource { - content_type: "application/javascript".to_owned(), - data: base64::encode("resource data"), - }) - ); - } -} diff --git a/third_party/rust/adblock/v0_7/crate/src/resources/scriptlet_resource_storage.rs b/third_party/rust/adblock/v0_7/crate/src/resources/scriptlet_resource_storage.rs deleted file mode 100644 index 24bba43c4822..000000000000 --- a/third_party/rust/adblock/v0_7/crate/src/resources/scriptlet_resource_storage.rs +++ /dev/null @@ -1,341 +0,0 @@ -use std::collections::HashMap; - -use once_cell::sync::Lazy; -use regex::Regex; -use serde::{Deserialize, Serialize}; - -use crate::resources::{AddResourceError, MimeType, Resource, ResourceType}; - -static TEMPLATE_ARGUMENT_RE: [Lazy; 9] = [ - Lazy::new(|| template_argument_regex(1)), - Lazy::new(|| template_argument_regex(2)), - Lazy::new(|| template_argument_regex(3)), - Lazy::new(|| template_argument_regex(4)), - Lazy::new(|| template_argument_regex(5)), - Lazy::new(|| template_argument_regex(6)), - Lazy::new(|| template_argument_regex(7)), - Lazy::new(|| template_argument_regex(8)), - Lazy::new(|| template_argument_regex(9)), -]; - -fn template_argument_regex(i: usize) -> Regex { - Regex::new(&format!(r"\{{\{{{}\}}\}}", i)).unwrap() -} - -#[derive(Debug, PartialEq)] -pub enum ScriptletResourceError { - NoMatchingScriptlet, - MissingScriptletName, -} - -#[derive(Clone, Deserialize, Serialize)] -pub struct ScriptletResource { - scriptlet: String, -} - -impl ScriptletResource { - /// Omit the 0th element of `args` (the scriptlet name) when calling this method. - fn patch(&self, args: &[impl AsRef]) -> String { - let mut scriptlet = self.scriptlet.to_owned(); - // `regex` treats `$` as a special character. Instead, `$$` is interpreted as a literal `$` - // character. - args.iter().enumerate().for_each(|(i, arg)| { - scriptlet = TEMPLATE_ARGUMENT_RE[i] - .replace(&scriptlet, arg.as_ref().replace('$', "$$")) - .to_string(); - }); - scriptlet - } -} - -#[derive(Default, Deserialize, Serialize)] -pub struct ScriptletResourceStorage { - #[serde(serialize_with = "crate::data_format::utils::stabilize_hashmap_serialization")] - resources: HashMap, -} - -impl ScriptletResourceStorage { - /// Convenience constructor that allows building storage for many resources at once, printing - /// any errors that occur. - #[cfg(test)] - pub fn from_resources(resources: &[Resource]) -> Self { - let mut self_ = Self::default(); - - resources.iter().for_each(|resource| { - self_ - .add_resource(&resource) - .unwrap_or_else(|_e| eprintln!("Failed to add resource: {:?}", _e)) - }); - - self_ - } - - /// Adds a resource. Only has an effect for application/javascript mimetypes and template - /// scriptlets. - pub fn add_resource(&mut self, resource: &Resource) -> Result<(), AddResourceError> { - let scriptlet = match resource.kind { - ResourceType::Mime(MimeType::ApplicationJavascript) | ResourceType::Template => { - let scriptlet = ScriptletResource { - scriptlet: String::from_utf8(base64::decode(&resource.content)?)?, - }; - Some(( - resource.name.to_owned(), - resource.aliases.to_owned(), - scriptlet, - )) - } - _ => None, - }; - - if let Some((name, res_aliases, resource)) = scriptlet { - res_aliases.iter().for_each(|alias| { - self.resources - .insert(without_js_extension(alias).to_owned(), resource.clone()); - }); - self.resources - .insert(without_js_extension(&name).to_owned(), resource); - }; - - Ok(()) - } - - pub fn get_scriptlet(&self, scriptlet_args: &str) -> Result { - let scriptlet_args = parse_scriptlet_args(scriptlet_args); - if scriptlet_args.is_empty() { - return Err(ScriptletResourceError::MissingScriptletName); - } - let scriptlet_name = without_js_extension(scriptlet_args[0].as_ref()); - let args = &scriptlet_args[1..]; - let template = self - .resources - .get(scriptlet_name) - .ok_or(ScriptletResourceError::NoMatchingScriptlet)?; - - Ok(template.patch(args)) - } -} - -fn without_js_extension(scriptlet_name: &str) -> &str { - if let Some(stripped) = scriptlet_name.strip_suffix(".js") { - stripped - } else { - scriptlet_name - } -} - -/// Parses the inner contents of a `+js(...)` block into a Vec of its comma-delimited elements. -/// -/// A literal comma is produced by the '\,' pattern. Otherwise, all '\', '"', and ''' characters -/// are erased in the resulting arguments. -pub fn parse_scriptlet_args(args: &str) -> Vec { - static ESCAPE_SCRIPTLET_ARG_RE: Lazy = Lazy::new(|| Regex::new(r#"[\\'"]"#).unwrap()); - - // Guarantee that the last character is not a backslash - let args = args.trim_end_matches('\\'); - - let mut args_vec = vec![]; - if args.trim().len() == 0 { - return args_vec; - } - - let mut after_last_delim = 0; - - let comma_positions = memchr::memchr_iter(b',', args.as_bytes()); - let mut continuation = None; - for comma_pos in comma_positions.chain(std::iter::once(args.len())) { - let mut part = &args[after_last_delim..comma_pos]; - let mut is_continuation = false; - - if part.len() > 0 && part.as_bytes()[part.len() - 1] == b'\\' { - part = &part[0..part.len() - 1]; - is_continuation = true; - } - - let mut target = if let Some(s) = continuation.take() { - String::from(s) - } else { - String::new() - }; - - target += part; - if is_continuation { - target += ","; - continuation = Some(target); - } else { - args_vec.push(ESCAPE_SCRIPTLET_ARG_RE.replace_all(&target, "\\$0").trim().to_string()); - } - - after_last_delim = comma_pos + 1; - } - - args_vec -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn parse_argslist() { - let args = parse_scriptlet_args("scriptlet, hello world, foobar"); - assert_eq!(args, vec!["scriptlet", "hello world", "foobar"]); - } - - #[test] - fn parse_argslist_noargs() { - let args = parse_scriptlet_args("scriptlet"); - assert_eq!(args, vec!["scriptlet"]); - } - - #[test] - fn parse_argslist_empty() { - let args = parse_scriptlet_args(""); - assert!(args.is_empty()); - } - - #[test] - fn parse_argslist_commas() { - let args = parse_scriptlet_args("scriptletname, one\\, two\\, three, four"); - assert_eq!(args, vec!["scriptletname", "one, two, three", "four"]); - } - - #[test] - fn parse_argslist_badchars() { - let args = parse_scriptlet_args( - r##"scriptlet, "; window.location.href = bad.com; , '; alert("you're\, hacked"); , \u\r\l(bad.com) "##, - ); - assert_eq!( - args, - vec![ - r#"scriptlet"#, - r#"\"; window.location.href = bad.com;"#, - r#"\'; alert(\"you\'re, hacked\");"#, - r#"\\u\\r\\l(bad.com)"# - ] - ); - } - - - #[test] - fn get_patched_scriptlets() { - let mut resources = HashMap::new(); - resources.insert( - "greet".to_owned(), - ScriptletResource { - scriptlet: "console.log('Hello {{1}}, my name is {{2}}')".to_owned(), - }, - ); - resources.insert( - "alert".to_owned(), - ScriptletResource { - scriptlet: "alert('{{1}}')".to_owned(), - }, - ); - resources.insert( - "blocktimer".to_owned(), - ScriptletResource { - scriptlet: "setTimeout(blockAds, {{1}})".to_owned(), - }, - ); - resources.insert( - "null".to_owned(), - ScriptletResource { - scriptlet: "(()=>{})()".to_owned(), - }, - ); - resources.insert( - "set-local-storage-item".to_owned(), - ScriptletResource { - scriptlet: r#"{{1}} that dollar signs in {{2}} are untouched"#.to_owned(), - } - ); - let scriptlets = ScriptletResourceStorage { resources }; - - assert_eq!( - scriptlets.get_scriptlet("greet, world, adblock-rust"), - Ok("console.log('Hello world, my name is adblock-rust')".into()) - ); - assert_eq!( - scriptlets.get_scriptlet("alert, All systems are go!! "), - Ok("alert('All systems are go!!')".into()) - ); - assert_eq!( - scriptlets.get_scriptlet("alert, Uh oh\\, check the logs..."), - Ok("alert('Uh oh, check the logs...')".into()) - ); - assert_eq!( - scriptlets.get_scriptlet(r#"alert, this has "quotes""#), - Ok(r#"alert('this has \"quotes\"')"#.into()) - ); - assert_eq!( - scriptlets.get_scriptlet("blocktimer, 3000"), - Ok("setTimeout(blockAds, 3000)".into()) - ); - assert_eq!(scriptlets.get_scriptlet("null"), Ok("(()=>{})()".into())); - assert_eq!( - scriptlets.get_scriptlet("null, null"), - Ok("(()=>{})()".into()) - ); - assert_eq!( - scriptlets.get_scriptlet("greet, everybody"), - Ok("console.log('Hello everybody, my name is {{2}}')".into()) - ); - - assert_eq!( - scriptlets.get_scriptlet("unit-testing"), - Err(ScriptletResourceError::NoMatchingScriptlet) - ); - assert_eq!( - scriptlets.get_scriptlet(""), - Err(ScriptletResourceError::MissingScriptletName) - ); - - assert_eq!( - scriptlets.get_scriptlet("set-local-storage-item, Test, $remove$"), - Ok("Test that dollar signs in $remove$ are untouched".into()), - ); - } - - #[test] - fn parse_template_file_format() { - let scriptlets = ScriptletResourceStorage::from_resources(&[ - Resource { - name: "abort-current-inline-script.js".into(), - aliases: vec!["acis.js".into()], - kind: ResourceType::Mime(MimeType::ApplicationJavascript), - content: base64::encode("(function() {alert(\"hi\");})();"), - }, - Resource { - name: "abort-on-property-read.js".into(), - aliases: vec!["aopr".into()], - kind: ResourceType::Template, - content: base64::encode("(function() {confirm(\"Do you want to {{1}}?\");})();"), - }, - ]); - - assert_eq!( - scriptlets.get_scriptlet("aopr, code"), - Ok("(function() {confirm(\"Do you want to code?\");})();".to_owned()), - ); - - assert_eq!( - scriptlets.get_scriptlet("abort-on-property-read, write tests"), - Ok("(function() {confirm(\"Do you want to write tests?\");})();".to_owned()), - ); - - assert_eq!( - scriptlets.get_scriptlet("abort-on-property-read.js, block advertisements"), - Ok("(function() {confirm(\"Do you want to block advertisements?\");})();".to_owned()), - ); - - assert_eq!( - scriptlets.get_scriptlet("acis"), - Ok("(function() {alert(\"hi\");})();".to_owned()), - ); - - assert_eq!( - scriptlets.get_scriptlet("acis.js"), - Ok("(function() {alert(\"hi\");})();".to_owned()), - ); - } -} diff --git a/third_party/rust/adblock/v0_7/crate/src/serialization.rs b/third_party/rust/adblock/v0_7/crate/src/serialization.rs deleted file mode 100644 index 21314e541268..000000000000 --- a/third_party/rust/adblock/v0_7/crate/src/serialization.rs +++ /dev/null @@ -1,97 +0,0 @@ -use flate2::write::GzEncoder; -use flate2::read::GzDecoder; -use flate2::Compression; -use serde::{Serialize, Deserialize}; - -use crate::blocker::Blocker; - -// Pick version to use for serialization from cargo package version -pub const VERSION: &'static str = env!("CARGO_PKG_VERSION"); - -// Helper structs to use the wrapped Blocker struct with its own serialization definitions -pub struct Wrapper<'a> { - pub wrapped: &'a Blocker -} - -pub struct Unwrappable { - pub wrapped: Box -} - -impl<'a> serde::Serialize for Wrapper<'a> { - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer, - { - // An intermediate struct that adds manifest version and includes - // the wrapped structure already encoded. Allows for checking of - // `manifest-version` before any other field gets decoded - #[derive(Serialize)] - struct EncodedBlocker<'b> { - #[serde(rename = "manifest-version")] - manifest_version: &'b str, - blocker: &'b Vec - } - - let mut gz = GzEncoder::new(Vec::new(), Compression::default()); - - bincode::serialize_into(&mut gz, &self) - .or_else(|e| { - Err(D::Error::invalid_value(::serde::de::Unexpected::Other("Failed to serialize to bincode"), &e.to_string().as_str())) - })?; - - let compressed = gz.finish().unwrap(); - .or_else(|e| { - Err(D::Error::invalid_value(::serde::de::Unexpected::Other("Failed to finish Gzip encoding"), &e.to_string().as_str())) - })?; - - let output = EncodedBlocker { - // Pick version to use for serialization from cargo package version - manifest_version: VERSION, - blocker: &compressed, - }; - - // Once again, serde does all the hard work for us - output.serialize(serializer) - } -} - -impl<'de> serde::Deserialize<'de> for Unwrappable { - fn deserialize(deserializer: D) -> Result - where - D: serde::Deserializer<'de>, - { - use serde::de::Error; - - // An intermediate struct that exactly matches the input schema. - #[derive(Deserialize)] - struct EncodedBlocker { - #[serde(rename = "manifest-version")] - pub manifest_version: String, - pub blocker: Vec - } - - // Because we derived Deserialize automatically, - // serde does all the hard work for us. - let input = EncodedBlocker::deserialize(deserializer)?; - - // Validating the manifest_version field is straightforward. - if input.manifest_version != VERSION { - return Err(D::Error::invalid_value( - ::serde::de::Unexpected::Str(&input.manifest_version), &VERSION - )); - } - - let gz = GzDecoder::new(&input.blocker[..]); - let blocker = bincode::deserialize_from(gz) - .or_else(|e| { - Err(D::Error::invalid_value(::serde::de::Unexpected::Other("Failed to parse bincode formatted data"), &e.to_string().as_str())) - })?; - - // Finally, we move all the data into an instance - // of our wrapper struct. - Ok(Unwrappable { - wrapped: Box::new(blocker) - }) - } -} - diff --git a/third_party/rust/adblock/v0_7/BUILD.gn b/third_party/rust/adblock/v0_8/BUILD.gn similarity index 88% rename from third_party/rust/adblock/v0_7/BUILD.gn rename to third_party/rust/adblock/v0_8/BUILD.gn index a4b7ef605dad..854d22a7a4d1 100644 --- a/third_party/rust/adblock/v0_7/BUILD.gn +++ b/third_party/rust/adblock/v0_8/BUILD.gn @@ -6,15 +6,15 @@ import("//build/rust/cargo_crate.gni") cargo_crate("lib") { crate_name = "adblock" - epoch = "0.7" + epoch = "0.8" crate_type = "rlib" crate_root = "crate/src/lib.rs" # Unit tests skipped. Generate with --with-tests to include them. build_native_rust_unit_tests = false sources = [ "crate/src/lib.rs" ] - edition = "2018" - cargo_pkg_version = "0.7.17" + edition = "2021" + cargo_pkg_version = "0.8.0" cargo_pkg_authors = "Andrius Aucinas , Anton Lazarev " cargo_pkg_name = "adblock" @@ -25,11 +25,9 @@ cargo_crate("lib") { executable_configs += [ "//build/config/compiler:no_chromium_code" ] deps = [ "//brave/third_party/rust/cssparser/v0_28:lib", - "//brave/third_party/rust/flate2/v1:lib", "//brave/third_party/rust/idna/v0_2:lib", "//brave/third_party/rust/itertools/v0_10:lib", "//brave/third_party/rust/percent_encoding/v2:lib", - "//brave/third_party/rust/rmp_serde/v0_13:lib", "//brave/third_party/rust/rmp_serde/v0_15:lib", "//brave/third_party/rust/seahash/v3:lib", "//brave/third_party/rust/selectors/v0_23:lib", @@ -42,14 +40,11 @@ cargo_crate("lib") { "//third_party/rust/regex/v1:lib", "//third_party/rust/serde/v1:lib", ] - aliased_deps = { - rmp_serde_legacy = "//brave/third_party/rust/rmp_serde/v0_13:lib__rlib" - } features = [ "css-validation", "cssparser", - "debug-info", "full-regex-handling", + "regex-debug-info", "selectors", ] if (is_ios) { diff --git a/third_party/rust/adblock/v0_7/README.chromium b/third_party/rust/adblock/v0_8/README.chromium similarity index 76% rename from third_party/rust/adblock/v0_7/README.chromium rename to third_party/rust/adblock/v0_8/README.chromium index 50be34c7bb49..73e30e2160aa 100644 --- a/third_party/rust/adblock/v0_7/README.chromium +++ b/third_party/rust/adblock/v0_8/README.chromium @@ -1,7 +1,7 @@ Name: adblock URL: https://crates.io/crates/adblock Description: Native Rust module for Adblock Plus syntax (e.g. EasyList, EasyPrivacy) filter parsing and matching. -Version: 0.7.17 +Version: 0.8.0 Security Critical: no License: Mozilla Public License 2.0 -Revision: 735027feb6b617851732482d42bc673b15e5b968 +Revision: cb0dd04c193cf301403d7596bee326f4a0a1658b diff --git a/third_party/rust/adblock/v0_8/crate/.cargo_vcs_info.json b/third_party/rust/adblock/v0_8/crate/.cargo_vcs_info.json new file mode 100644 index 000000000000..caa9f4aa8557 --- /dev/null +++ b/third_party/rust/adblock/v0_8/crate/.cargo_vcs_info.json @@ -0,0 +1,6 @@ +{ + "git": { + "sha1": "cb0dd04c193cf301403d7596bee326f4a0a1658b" + }, + "path_in_vcs": "" +} \ No newline at end of file diff --git a/third_party/rust/adblock/v0_7/crate/Cargo.lock b/third_party/rust/adblock/v0_8/crate/Cargo.lock similarity index 97% rename from third_party/rust/adblock/v0_7/crate/Cargo.lock rename to third_party/rust/adblock/v0_8/crate/Cargo.lock index 67f6c6266508..67000c8cb25f 100644 --- a/third_party/rust/adblock/v0_7/crate/Cargo.lock +++ b/third_party/rust/adblock/v0_8/crate/Cargo.lock @@ -4,7 +4,7 @@ version = 3 [[package]] name = "adblock" -version = "0.7.17" +version = "0.8.0" dependencies = [ "addr", "base64", @@ -12,7 +12,6 @@ dependencies = [ "criterion", "cssparser", "csv", - "flate2", "futures", "idna 0.2.3", "itertools", @@ -23,8 +22,7 @@ dependencies = [ "percent-encoding", "regex", "reqwest", - "rmp-serde 0.13.7", - "rmp-serde 0.15.5", + "rmp-serde", "seahash", "selectors", "serde", @@ -45,12 +43,6 @@ dependencies = [ "psl-types", ] -[[package]] -name = "adler" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" - [[package]] name = "aho-corasick" version = "0.7.19" @@ -215,15 +207,6 @@ dependencies = [ "libc", ] -[[package]] -name = "crc32fast" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" -dependencies = [ - "cfg-if", -] - [[package]] name = "criterion" version = "0.4.0" @@ -406,16 +389,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "flate2" -version = "1.0.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f82b0f4c27ad9f8bfd1f3208d882da2b09c301bc1c828fd3a00d0216d2fbbff6" -dependencies = [ - "crc32fast", - "miniz_oxide", -] - [[package]] name = "fnv" version = "1.0.7" @@ -782,15 +755,6 @@ version = "0.3.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d" -[[package]] -name = "miniz_oxide" -version = "0.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96590ba8f175222643a85693f33d26e9c8a015f599c216509b1a6894af675d34" -dependencies = [ - "adler", -] - [[package]] name = "mio" version = "0.8.4" @@ -1181,17 +1145,6 @@ dependencies = [ "paste", ] -[[package]] -name = "rmp-serde" -version = "0.13.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "011e1d58446e9fa3af7cdc1fb91295b10621d3ac4cb3a85cc86385ee9ca50cd3" -dependencies = [ - "byteorder", - "rmp", - "serde", -] - [[package]] name = "rmp-serde" version = "0.15.5" diff --git a/third_party/rust/adblock/v0_7/crate/Cargo.toml b/third_party/rust/adblock/v0_8/crate/Cargo.toml similarity index 92% rename from third_party/rust/adblock/v0_7/crate/Cargo.toml rename to third_party/rust/adblock/v0_8/crate/Cargo.toml index d0a3b5856c08..8a94a144446a 100644 --- a/third_party/rust/adblock/v0_7/crate/Cargo.toml +++ b/third_party/rust/adblock/v0_8/crate/Cargo.toml @@ -10,9 +10,9 @@ # See Cargo.toml.orig for the original contents. [package] -edition = "2018" +edition = "2021" name = "adblock" -version = "0.7.17" +version = "0.8.0" authors = [ "Andrius Aucinas ", "Anton Lazarev ", @@ -73,11 +73,6 @@ version = "1.2" version = "0.28" optional = true -[dependencies.flate2] -version = "1.0" -features = ["rust_backend"] -default-features = false - [dependencies.idna] version = "0.2" @@ -103,10 +98,6 @@ version = "1.5" [dependencies.rmp-serde] version = "0.15" -[dependencies.rmp-serde-legacy] -version = "0.13.7" -package = "rmp-serde" - [dependencies.seahash] version = "3" @@ -165,7 +156,6 @@ css-validation = [ "cssparser", "selectors", ] -debug-info = [] default = [ "embedded-domain-resolver", "full-regex-handling", @@ -174,7 +164,7 @@ default = [ ] embedded-domain-resolver = ["addr"] full-regex-handling = [] -metrics = [] object-pooling = ["lifeguard"] +regex-debug-info = [] resource-assembler = ["serde_json"] unsync-regex-caching = [] diff --git a/third_party/rust/adblock/v0_7/crate/Cargo.toml.orig b/third_party/rust/adblock/v0_8/crate/Cargo.toml.orig similarity index 89% rename from third_party/rust/adblock/v0_7/crate/Cargo.toml.orig rename to third_party/rust/adblock/v0_8/crate/Cargo.toml.orig index 5264aabe2858..d2edd45e4307 100644 --- a/third_party/rust/adblock/v0_7/crate/Cargo.toml.orig +++ b/third_party/rust/adblock/v0_8/crate/Cargo.toml.orig @@ -1,8 +1,8 @@ [package] name = "adblock" -version = "0.7.17" +version = "0.8.0" authors = ["Andrius Aucinas ", "Anton Lazarev "] -edition = "2018" +edition = "2021" description = "Native Rust module for Adblock Plus syntax (e.g. EasyList, EasyPrivacy) filter parsing and matching." repository = "https://github.com/brave/adblock-rust/" @@ -32,11 +32,9 @@ bitflags = "1.2" itertools = "0.10" idna = "0.2" serde = { version = "1.0", features = ["derive", "rc"] } -flate2 = { version = "1.0", features = ["rust_backend"], default-features = false } seahash = "3" # seahash 4 introduces a breaking hash algorithm change memchr = "2.4" base64 = "0.13" -rmp-serde-legacy = { package = "rmp-serde", version = "0.13.7" } # rmp-serde 0.14.0 breaks deserialization by changing how enums are deserialized rmp-serde = "0.15" lifeguard = { version = "^ 0.6.1", optional = true } cssparser = { version = "0.28", optional = true } @@ -88,11 +86,10 @@ harness = false # If disabling default features, consider explicitly re-enabling the # "embedded-domain-resolver" feature. default = ["embedded-domain-resolver", "full-regex-handling", "object-pooling", "unsync-regex-caching"] -metrics = [] full-regex-handling = [] object-pooling = ["lifeguard"] # disables `Send` and `Sync` on `Engine`. unsync-regex-caching = [] # disables `Send` and `Sync` on `Engine`. -debug-info = [] +regex-debug-info = [] css-validation = ["cssparser", "selectors"] content-blocking = ["serde_json"] embedded-domain-resolver = ["addr"] # Requires setting an external domain resolver if disabled. diff --git a/third_party/rust/adblock/v0_7/crate/LICENSE b/third_party/rust/adblock/v0_8/crate/LICENSE similarity index 100% rename from third_party/rust/adblock/v0_7/crate/LICENSE rename to third_party/rust/adblock/v0_8/crate/LICENSE diff --git a/third_party/rust/adblock/v0_7/crate/README.md b/third_party/rust/adblock/v0_8/crate/README.md similarity index 100% rename from third_party/rust/adblock/v0_7/crate/README.md rename to third_party/rust/adblock/v0_8/crate/README.md diff --git a/third_party/rust/adblock/v0_7/crate/benches/bench_cosmetic_matching.rs b/third_party/rust/adblock/v0_8/crate/benches/bench_cosmetic_matching.rs similarity index 53% rename from third_party/rust/adblock/v0_7/crate/benches/bench_cosmetic_matching.rs rename to third_party/rust/adblock/v0_8/crate/benches/bench_cosmetic_matching.rs index fe17efef6586..f3a295d73c9a 100644 --- a/third_party/rust/adblock/v0_7/crate/benches/bench_cosmetic_matching.rs +++ b/third_party/rust/adblock/v0_8/crate/benches/bench_cosmetic_matching.rs @@ -3,7 +3,10 @@ use criterion::*; use adblock::cosmetic_filter_cache::CosmeticFilterCache; use adblock::lists::{parse_filters, FilterFormat}; -use adblock::utils::rules_from_lists; + +#[path = "../tests/test_utils.rs"] +mod test_utils; +use test_utils::rules_from_lists; fn by_hostname(c: &mut Criterion) { let mut group = c.benchmark_group("cosmetic-hostname-match"); @@ -12,28 +15,28 @@ fn by_hostname(c: &mut Criterion) { group.sample_size(20); group.bench_function("easylist", move |b| { - let rules = rules_from_lists(&vec!["data/easylist.to/easylist/easylist.txt".to_owned()]); + let rules = rules_from_lists(&["data/easylist.to/easylist/easylist.txt"]); let (_, cosmetic_filters) = parse_filters(&rules, false, FilterFormat::Standard); let cfcache = CosmeticFilterCache::from_rules(cosmetic_filters); b.iter(|| cfcache.hostname_cosmetic_resources("google.com")) }); group.bench_function("many lists", move |b| { - let rules = rules_from_lists(&vec![ - "data/easylist.to/easylist/easylist.txt".to_owned(), - "data/easylist.to/easylistgermany/easylistgermany.txt".to_owned(), - "data/uBlockOrigin/filters.txt".to_owned(), - "data/uBlockOrigin/unbreak.txt".to_owned(), + let rules = rules_from_lists(&[ + "data/easylist.to/easylist/easylist.txt", + "data/easylist.to/easylistgermany/easylistgermany.txt", + "data/uBlockOrigin/filters.txt", + "data/uBlockOrigin/unbreak.txt", ]); let (_, cosmetic_filters) = parse_filters(&rules, false, FilterFormat::Standard); let cfcache = CosmeticFilterCache::from_rules(cosmetic_filters); b.iter(|| cfcache.hostname_cosmetic_resources("google.com")) }); group.bench_function("complex_hostname", move |b| { - let rules = rules_from_lists(&vec![ - "data/easylist.to/easylist/easylist.txt".to_owned(), - "data/easylist.to/easylistgermany/easylistgermany.txt".to_owned(), - "data/uBlockOrigin/filters.txt".to_owned(), - "data/uBlockOrigin/unbreak.txt".to_owned(), + let rules = rules_from_lists(&[ + "data/easylist.to/easylist/easylist.txt", + "data/easylist.to/easylistgermany/easylistgermany.txt", + "data/uBlockOrigin/filters.txt", + "data/uBlockOrigin/unbreak.txt", ]); let (_, cosmetic_filters) = parse_filters(&rules, false, FilterFormat::Standard); let cfcache = CosmeticFilterCache::from_rules(cosmetic_filters); @@ -50,77 +53,77 @@ fn by_classes_ids(c: &mut Criterion) { group.sample_size(20); group.bench_function("easylist", move |b| { - let rules = rules_from_lists(&vec!["data/easylist.to/easylist/easylist.txt".to_owned()]); + let rules = rules_from_lists(&["data/easylist.to/easylist/easylist.txt"]); let (_, cosmetic_filters) = parse_filters(&rules, false, FilterFormat::Standard); let cfcache = CosmeticFilterCache::from_rules(cosmetic_filters); let exceptions = Default::default(); b.iter(|| { cfcache.hidden_class_id_selectors( - &vec!["ad".to_owned()][..], - &vec!["ad".to_owned()][..], + &["ad"], + &["ad"], &exceptions, ) }) }); group.bench_function("many lists", move |b| { - let rules = rules_from_lists(&vec![ - "data/easylist.to/easylist/easylist.txt".to_owned(), - "data/easylist.to/easylistgermany/easylistgermany.txt".to_owned(), - "data/uBlockOrigin/filters.txt".to_owned(), - "data/uBlockOrigin/unbreak.txt".to_owned(), + let rules = rules_from_lists(&[ + "data/easylist.to/easylist/easylist.txt", + "data/easylist.to/easylistgermany/easylistgermany.txt", + "data/uBlockOrigin/filters.txt", + "data/uBlockOrigin/unbreak.txt", ]); let (_, cosmetic_filters) = parse_filters(&rules, false, FilterFormat::Standard); let cfcache = CosmeticFilterCache::from_rules(cosmetic_filters); let exceptions = Default::default(); b.iter(|| { cfcache.hidden_class_id_selectors( - &vec!["ad".to_owned()][..], - &vec!["ad".to_owned()][..], + &["ad"], + &["ad"], &exceptions, ) }) }); group.bench_function("many matching classes and ids", move |b| { - let rules = rules_from_lists(&vec![ - "data/easylist.to/easylist/easylist.txt".to_owned(), - "data/easylist.to/easylistgermany/easylistgermany.txt".to_owned(), - "data/uBlockOrigin/filters.txt".to_owned(), - "data/uBlockOrigin/unbreak.txt".to_owned(), + let rules = rules_from_lists(&[ + "data/easylist.to/easylist/easylist.txt", + "data/easylist.to/easylistgermany/easylistgermany.txt", + "data/uBlockOrigin/filters.txt", + "data/uBlockOrigin/unbreak.txt", ]); let (_, cosmetic_filters) = parse_filters(&rules, false, FilterFormat::Standard); let cfcache = CosmeticFilterCache::from_rules(cosmetic_filters); let exceptions = Default::default(); - let class_list = vec![ - "block-bg-advertisement-region-1".to_owned(), - "photobox-adbox".to_owned(), - "headerad-720".to_owned(), - "rscontainer".to_owned(), - "rail-article-sponsored".to_owned(), - "fbPhotoSnowboxAds".to_owned(), - "sidebar_ad_module".to_owned(), - "ad-728x90_forum".to_owned(), - "commercial-unit-desktop-rhs".to_owned(), - "sponsored-editorial".to_owned(), - "rr-300x600-ad".to_owned(), - "adfoot".to_owned(), - "lads".to_owned(), + let class_list = [ + "block-bg-advertisement-region-1", + "photobox-adbox", + "headerad-720", + "rscontainer", + "rail-article-sponsored", + "fbPhotoSnowboxAds", + "sidebar_ad_module", + "ad-728x90_forum", + "commercial-unit-desktop-rhs", + "sponsored-editorial", + "rr-300x600-ad", + "adfoot", + "lads", ]; - let id_list = vec![ - "footer-adspace".to_owned(), - "adsponsored_links_box".to_owned(), - "lsadvert-top".to_owned(), - "mn".to_owned(), - "col-right-ad".to_owned(), - "view_ads_bottom_bg_middle".to_owned(), - "ad_468x60".to_owned(), - "rightAdColumn".to_owned(), - "content".to_owned(), - "rhs_block".to_owned(), - "center_col".to_owned(), - "header".to_owned(), - "advertisingModule160x600".to_owned(), + let id_list = [ + "footer-adspace", + "adsponsored_links_box", + "lsadvert-top", + "mn", + "col-right-ad", + "view_ads_bottom_bg_middle", + "ad_468x60", + "rightAdColumn", + "content", + "rhs_block", + "center_col", + "header", + "advertisingModule160x600", ]; - b.iter(|| cfcache.hidden_class_id_selectors(&class_list[..], &id_list[..], &exceptions)) + b.iter(|| cfcache.hidden_class_id_selectors(&class_list, &id_list, &exceptions)) }); group.finish(); diff --git a/third_party/rust/adblock/v0_7/crate/benches/bench_matching.rs b/third_party/rust/adblock/v0_8/crate/benches/bench_matching.rs similarity index 65% rename from third_party/rust/adblock/v0_7/crate/benches/bench_matching.rs rename to third_party/rust/adblock/v0_8/crate/benches/bench_matching.rs index 427918955366..155b5e4eb7cf 100644 --- a/third_party/rust/adblock/v0_7/crate/benches/bench_matching.rs +++ b/third_party/rust/adblock/v0_8/crate/benches/bench_matching.rs @@ -2,11 +2,15 @@ use criterion::*; use serde::{Deserialize, Serialize}; +use adblock::Engine; use adblock::blocker::{Blocker, BlockerOptions}; -use adblock::engine::Engine; use adblock::request::Request; +use adblock::resources::ResourceStorage; use adblock::url_parser::parse_url; -use adblock::utils::rules_from_lists; + +#[path = "../tests/test_utils.rs"] +mod test_utils; +use test_utils::rules_from_lists; #[allow(non_snake_case)] #[derive(Serialize, Deserialize, Clone)] @@ -16,8 +20,14 @@ struct TestRequest { cpt: String, } +impl From<&TestRequest> for Request { + fn from(v: &TestRequest) -> Self { + Request::new(&v.url, &v.frameUrl, &v.cpt).unwrap() + } +} + fn load_requests() -> Vec { - let requests_str = adblock::utils::read_file_lines("data/requests.json"); + let requests_str = rules_from_lists(&["data/requests.json"]); let reqs: Vec = requests_str .into_iter() .map(|r| serde_json::from_str(&r)) @@ -26,7 +36,7 @@ fn load_requests() -> Vec { reqs } -fn get_blocker(rules: &Vec) -> Blocker { +fn get_blocker(rules: impl IntoIterator>) -> Blocker { let (network_filters, _) = adblock::lists::parse_filters(rules, false, Default::default()); let blocker_options = BlockerOptions { @@ -40,7 +50,7 @@ fn bench_rule_matching(engine: &Engine, requests: &Vec) -> (u32, u3 let mut matches = 0; let mut passes = 0; requests.iter().for_each(|r| { - let res = engine.check_network_urls(&r.url, &r.frameUrl, &r.cpt); + let res = engine.check_network_request(&r.into()); if res.matched { matches += 1; } else { @@ -51,11 +61,11 @@ fn bench_rule_matching(engine: &Engine, requests: &Vec) -> (u32, u3 (matches, passes) } -fn bench_matching_only(blocker: &Blocker, requests: &Vec) -> (u32, u32) { +fn bench_matching_only(blocker: &Blocker, resources: &ResourceStorage, requests: &Vec) -> (u32, u32) { let mut matches = 0; let mut passes = 0; requests.iter().for_each(|parsed| { - let check = blocker.check(&parsed); + let check = blocker.check(&parsed, resources); if check.matched { matches += 1; } else { @@ -68,19 +78,19 @@ fn bench_matching_only(blocker: &Blocker, requests: &Vec) -> (u32, u32) fn bench_rule_matching_browserlike( blocker: &Engine, - requests: &Vec<(String, String, String, String, Option)>, + requests: &Vec<(String, String, String, String, bool)>, ) -> (u32, u32) { let mut matches = 0; let mut passes = 0; requests.iter().for_each( |(url, hostname, source_hostname, request_type, third_party)| { - let check = blocker.check_network_urls_with_hostnames( + let check = blocker.check_network_request(&Request::preparsed( &url, &hostname, &source_hostname, &request_type, *third_party, - ); + )); if check.matched { matches += 1; } else { @@ -105,21 +115,21 @@ fn rule_match(c: &mut Criterion) { group.sample_size(10); group.bench_function("el+ep", move |b| { - let rules = rules_from_lists(&vec![ - "data/easylist.to/easylist/easylist.txt".to_owned(), - "data/easylist.to/easylist/easyprivacy.txt".to_owned(), + let rules = rules_from_lists(&[ + "data/easylist.to/easylist/easylist.txt", + "data/easylist.to/easylist/easyprivacy.txt", ]); - let engine = Engine::from_rules(&rules, Default::default()); + let engine = Engine::from_rules(rules, Default::default()); b.iter(|| bench_rule_matching(&engine, &elep_req)) }); group.bench_function("easylist", move |b| { - let rules = rules_from_lists(&vec!["data/easylist.to/easylist/easylist.txt".to_owned()]); - let engine = Engine::from_rules(&rules, Default::default()); + let rules = rules_from_lists(&["data/easylist.to/easylist/easylist.txt"]); + let engine = Engine::from_rules(rules, Default::default()); b.iter(|| bench_rule_matching(&engine, &el_req)) }); group.bench_function("slimlist", move |b| { - let rules = rules_from_lists(&vec!["data/slim-list.txt".to_owned()]); - let engine = Engine::from_rules(&rules, Default::default()); + let rules = rules_from_lists(&["data/slim-list.txt"]); + let engine = Engine::from_rules(rules, Default::default()); b.iter(|| bench_rule_matching(&engine, &slim_req)) }); @@ -129,23 +139,24 @@ fn rule_match(c: &mut Criterion) { fn rule_match_parsed_el(c: &mut Criterion) { let mut group = c.benchmark_group("rule-match-parsed"); - let rules = rules_from_lists(&vec![String::from( + let rules = rules_from_lists(&[ "data/easylist.to/easylist/easylist.txt", - )]); + ]); let requests = load_requests(); let requests_parsed: Vec<_> = requests .into_iter() - .map(|r| Request::from_urls(&r.url, &r.frameUrl, &r.cpt)) + .map(|r| Request::new(&r.url, &r.frameUrl, &r.cpt)) .filter_map(Result::ok) .collect(); let requests_len = requests_parsed.len() as u64; - let blocker = get_blocker(&rules); + let blocker = get_blocker(rules); + let resources = ResourceStorage::default(); group.throughput(Throughput::Elements(requests_len)); group.sample_size(10); group.bench_function("easylist", move |b| { - b.iter(|| bench_matching_only(&blocker, &requests_parsed)) + b.iter(|| bench_matching_only(&blocker, &resources, &requests_parsed)) }); group.finish(); @@ -154,27 +165,28 @@ fn rule_match_parsed_el(c: &mut Criterion) { fn rule_match_parsed_elep_slimlist(c: &mut Criterion) { let mut group = c.benchmark_group("rule-match-parsed"); - let full_rules = rules_from_lists(&vec![ - String::from("data/easylist.to/easylist/easylist.txt"), - String::from("data/easylist.to/easylist/easyprivacy.txt"), + let full_rules = rules_from_lists(&[ + "data/easylist.to/easylist/easylist.txt", + "data/easylist.to/easylist/easyprivacy.txt", ]); - let blocker = get_blocker(&full_rules); + let blocker = get_blocker(full_rules); + let resources = ResourceStorage::default(); let requests = load_requests(); let requests_parsed: Vec<_> = requests .into_iter() - .map(|r| Request::from_urls(&r.url, &r.frameUrl, &r.cpt)) + .map(|r| Request::new(&r.url, &r.frameUrl, &r.cpt)) .filter_map(Result::ok) .collect(); let requests_len = requests_parsed.len() as u64; - let slim_rules = rules_from_lists(&vec![String::from("data/slim-list.txt")]); - let slim_blocker = get_blocker(&slim_rules); + let slim_rules = rules_from_lists(&["data/slim-list.txt"]); + let slim_blocker = get_blocker(slim_rules); let requests_copy = load_requests(); let requests_parsed_copy: Vec<_> = requests_copy .into_iter() - .map(|r| Request::from_urls(&r.url, &r.frameUrl, &r.cpt)) + .map(|r| Request::new(&r.url, &r.frameUrl, &r.cpt)) .filter_map(Result::ok) .collect(); @@ -182,10 +194,11 @@ fn rule_match_parsed_elep_slimlist(c: &mut Criterion) { group.sample_size(10); group.bench_function("el+ep", move |b| { - b.iter(|| bench_matching_only(&blocker, &requests_parsed)) + b.iter(|| bench_matching_only(&blocker, &resources, &requests_parsed)) }); + let resources = ResourceStorage::default(); group.bench_function("slimlist", move |b| { - b.iter(|| bench_matching_only(&slim_blocker, &requests_parsed_copy)) + b.iter(|| bench_matching_only(&slim_blocker, &resources, &requests_parsed_copy)) }); group.finish(); @@ -197,26 +210,26 @@ fn serialization(c: &mut Criterion) { group.sample_size(20); group.bench_function("el+ep", move |b| { - let full_rules = rules_from_lists(&vec![ - String::from("data/easylist.to/easylist/easylist.txt"), - String::from("data/easylist.to/easylist/easyprivacy.txt"), + let full_rules = rules_from_lists(&[ + "data/easylist.to/easylist/easylist.txt", + "data/easylist.to/easylist/easyprivacy.txt", ]); - let engine = Engine::from_rules(&full_rules, Default::default()); + let engine = Engine::from_rules(full_rules, Default::default()); b.iter(|| assert!(engine.serialize_raw().unwrap().len() > 0)) }); group.bench_function("el", move |b| { - let full_rules = rules_from_lists(&vec![String::from( + let full_rules = rules_from_lists(&[ "data/easylist.to/easylist/easylist.txt", - )]); + ]); - let engine = Engine::from_rules(&full_rules, Default::default()); + let engine = Engine::from_rules(full_rules, Default::default()); b.iter(|| assert!(engine.serialize_raw().unwrap().len() > 0)) }); group.bench_function("slimlist", move |b| { - let full_rules = rules_from_lists(&vec![String::from("data/slim-list.txt")]); + let full_rules = rules_from_lists(&["data/slim-list.txt"]); - let engine = Engine::from_rules(&full_rules, Default::default()); + let engine = Engine::from_rules(full_rules, Default::default()); b.iter(|| assert!(engine.serialize_raw().unwrap().len() > 0)) }); @@ -229,12 +242,12 @@ fn deserialization(c: &mut Criterion) { group.sample_size(20); group.bench_function("el+ep", move |b| { - let full_rules = rules_from_lists(&vec![ - String::from("data/easylist.to/easylist/easylist.txt"), - String::from("data/easylist.to/easylist/easyprivacy.txt"), + let full_rules = rules_from_lists(&[ + "data/easylist.to/easylist/easylist.txt", + "data/easylist.to/easylist/easyprivacy.txt", ]); - let engine = Engine::from_rules(&full_rules, Default::default()); + let engine = Engine::from_rules(full_rules, Default::default()); let serialized = engine.serialize_raw().unwrap(); b.iter(|| { @@ -243,11 +256,11 @@ fn deserialization(c: &mut Criterion) { }) }); group.bench_function("el", move |b| { - let full_rules = rules_from_lists(&vec![String::from( + let full_rules = rules_from_lists(&[ "data/easylist.to/easylist/easylist.txt", - )]); + ]); - let engine = Engine::from_rules(&full_rules, Default::default()); + let engine = Engine::from_rules(full_rules, Default::default()); let serialized = engine.serialize_raw().unwrap(); b.iter(|| { @@ -256,9 +269,9 @@ fn deserialization(c: &mut Criterion) { }) }); group.bench_function("slimlist", move |b| { - let full_rules = rules_from_lists(&vec![String::from("data/slim-list.txt")]); + let full_rules = rules_from_lists(&["data/slim-list.txt"]); - let engine = Engine::from_rules(&full_rules, Default::default()); + let engine = Engine::from_rules(full_rules, Default::default()); let serialized = engine.serialize_raw().unwrap(); b.iter(|| { @@ -281,7 +294,7 @@ fn rule_match_browserlike_comparable(c: &mut Criterion) { fn requests_parsed( requests: &[TestRequest], - ) -> Vec<(String, String, String, String, Option)> { + ) -> Vec<(String, String, String, String, bool)> { requests .iter() .map(|r| { @@ -296,22 +309,21 @@ fn rule_match_browserlike_comparable(c: &mut Criterion) { let maybe_parsed_source = parse_url(&source_url_norm); - if maybe_parsed_source.is_none() { + if let Some(parsed_source) = maybe_parsed_source { Ok(( parsed_url.url.to_owned(), parsed_url.hostname().to_owned(), - "".to_owned(), + parsed_source.hostname().to_owned(), r.cpt.clone(), - None, + parsed_source.domain() != parsed_url.domain(), )) } else { - let parsed_source = maybe_parsed_source.unwrap(); Ok(( parsed_url.url.to_owned(), parsed_url.hostname().to_owned(), - parsed_source.hostname().to_owned(), + "".to_owned(), r.cpt.clone(), - Some(parsed_source.domain() != parsed_url.domain()), + true, )) } }) @@ -324,21 +336,21 @@ fn rule_match_browserlike_comparable(c: &mut Criterion) { let slim = elep_req.clone(); group.bench_function("el+ep", move |b| { - let rules = rules_from_lists(&vec![ - "data/easylist.to/easylist/easylist.txt".to_owned(), - "data/easylist.to/easylist/easyprivacy.txt".to_owned(), + let rules = rules_from_lists(&[ + "data/easylist.to/easylist/easylist.txt", + "data/easylist.to/easylist/easyprivacy.txt", ]); - let engine = Engine::from_rules_parametrised(&rules, Default::default(), false, true); + let engine = Engine::from_rules_parametrised(rules, Default::default(), false, true); b.iter(|| bench_rule_matching_browserlike(&engine, &elep_req)) }); group.bench_function("el", move |b| { - let rules = rules_from_lists(&vec!["data/easylist.to/easylist/easylist.txt".to_owned()]); - let engine = Engine::from_rules_parametrised(&rules, Default::default(), false, true); + let rules = rules_from_lists(&["data/easylist.to/easylist/easylist.txt"]); + let engine = Engine::from_rules_parametrised(rules, Default::default(), false, true); b.iter(|| bench_rule_matching_browserlike(&engine, &el_req)) }); group.bench_function("slimlist", move |b| { - let rules = rules_from_lists(&vec!["data/slim-list.txt".to_owned()]); - let engine = Engine::from_rules_parametrised(&rules, Default::default(), false, true); + let rules = rules_from_lists(&["data/slim-list.txt"]); + let engine = Engine::from_rules_parametrised(rules, Default::default(), false, true); b.iter(|| bench_rule_matching_browserlike(&engine, &slim)) }); diff --git a/third_party/rust/adblock/v0_7/crate/benches/bench_redirect_performance.rs b/third_party/rust/adblock/v0_8/crate/benches/bench_redirect_performance.rs similarity index 64% rename from third_party/rust/adblock/v0_7/crate/benches/bench_redirect_performance.rs rename to third_party/rust/adblock/v0_8/crate/benches/bench_redirect_performance.rs index af29c29c4c7f..11996ba4fd19 100644 --- a/third_party/rust/adblock/v0_7/crate/benches/bench_redirect_performance.rs +++ b/third_party/rust/adblock/v0_8/crate/benches/bench_redirect_performance.rs @@ -1,13 +1,10 @@ -#[cfg(feature = "embedded-domain-resolver")] -use addr::{parser::DomainName, psl::List}; use criterion::*; use tokio::runtime::Runtime; use adblock::blocker::{Blocker, BlockerOptions}; use adblock::filters::network::{NetworkFilter, NetworkFilterMask}; use adblock::request::Request; -#[cfg(feature = "resource-assembler")] -use adblock::resources::resource_assembler::assemble_web_accessible_resources; +use adblock::resources::ResourceStorage; const DEFAULT_LISTS_URL: &str = "https://raw.githubusercontent.com/brave/adblock-resources/master/filter_lists/default.json"; @@ -16,21 +13,27 @@ async fn get_all_filters() -> Vec { use futures::FutureExt; #[derive(serde::Serialize, serde::Deserialize)] - struct ListDescriptor { + struct ComponentDescriptor { + sources: Vec, + } + + #[derive(serde::Serialize, serde::Deserialize)] + struct SourceDescriptor { url: String, } - let default_lists = reqwest::get(DEFAULT_LISTS_URL) + let default_components = reqwest::get(DEFAULT_LISTS_URL) .then(|resp| resp.expect("Could not get default filter listing").text()) .map(|text| { - serde_json::from_str::>( + serde_json::from_str::>( &text.expect("Could not get default filter listing as text"), ) .expect("Could not parse default filter listing JSON") }) .await; - let filters_fut: Vec<_> = default_lists + let filters_fut: Vec<_> = default_components[0] + .sources .iter() .map(|list| { reqwest::get(&list.url) @@ -61,13 +64,10 @@ fn get_redirect_rules() -> Vec { network_filters .into_iter() + .filter(NetworkFilter::is_redirect) + .filter(NetworkFilter::also_block_redirect) .filter(|rule| { - if let Some(ref redirect) = rule.modifier_option { - if redirect != "none" { - return true; - } - } - false + rule.modifier_option.as_ref().unwrap() != "none" }) .enumerate() .map(|(index, mut rule)| { @@ -92,32 +92,66 @@ fn get_preloaded_blocker(rules: Vec) -> Blocker { enable_optimizations: true, }; - #[cfg(not(feature = "resource-assembler"))] let blocker = Blocker::new(rules, &blocker_options); + blocker +} + +fn build_resources_for_filters(#[allow(unused)] filters: &[NetworkFilter]) -> ResourceStorage { + let mut resources = ResourceStorage::default(); + #[cfg(feature = "resource-assembler")] - let blocker = { + { use std::path::Path; + use adblock::resources::resource_assembler::assemble_web_accessible_resources; - let mut blocker = Blocker::new(rules, &blocker_options); - - let mut resources = assemble_web_accessible_resources( + let mut resource_data = assemble_web_accessible_resources( Path::new("data/test/fake-uBO-files/web_accessible_resources"), Path::new("data/test/fake-uBO-files/redirect-resources.js"), ); #[allow(deprecated)] - resources.append( + resource_data.append( &mut adblock::resources::resource_assembler::assemble_scriptlet_resources(Path::new( "data/test/fake-uBO-files/scriptlets.js", )), ); - blocker.use_resources(&resources); + resource_data + .into_iter() + .for_each(|resource| { + let _res = resources.add_resource(resource); + }); + } - blocker - }; + #[cfg(not(feature = "resource-assembler"))] + { + use adblock::resources::{Resource, ResourceType, MimeType}; + + filters + .iter() + .filter(|f| f.is_redirect()) + .map(|f| { + let mut redirect = f.modifier_option.as_ref().unwrap().as_str(); + // strip priority, if present + if let Some(i) = redirect.rfind(':') { + redirect = &redirect[0..i]; + } - blocker + Resource { + name: redirect.to_owned(), + aliases: vec![], + kind: ResourceType::Mime(MimeType::from_extension(&redirect)), + content: base64::encode(redirect), + dependencies: vec![], + permission: Default::default(), + } + }) + .for_each(|resource| { + let _res = resources.add_resource(resource); + }); + } + + resources } /// Maps network filter rules into `Request`s that would trigger those rules @@ -158,57 +192,40 @@ pub fn build_custom_requests(rules: Vec) -> Vec { let domain = &rule_hostname[..rule_hostname.find('/').unwrap()]; let hostname = domain; - #[allow(unused)] let raw_line = rule.raw_line.clone().unwrap(); - let (source_hostname, source_domain) = if rule.opt_domains.is_some() { - #[cfg(not(feature = "embedded-domain-resolver"))] - { - panic!("this test requires the `embedded-domain-resolver` feature"); - } - #[cfg(feature = "embedded-domain-resolver")] - { - let domain_start = raw_line.rfind("domain=").unwrap() + "domain=".len(); - let from_start = &raw_line[domain_start..]; - let domain_end = from_start - .find('|') - .or_else(|| from_start.find(",")) - .or_else(|| Some(from_start.len())) - .unwrap() - + domain_start; - let source_hostname = &raw_line[domain_start..domain_end]; - - let domain = List.parse_domain_name(source_hostname).unwrap(); - let suffix = domain.suffix(); - let domain_start = - source_hostname[..source_hostname.len() - suffix.len() - 1].rfind('.'); - let source_domain = if let Some(domain_start) = domain_start { - &source_hostname[domain_start + 1..] - } else { - source_hostname - }; - (source_hostname, source_domain) - } + let source_hostname = if rule.opt_domains.is_some() { + let domain_start = raw_line.rfind("domain=").unwrap() + "domain=".len(); + let from_start = &raw_line[domain_start..]; + let domain_end = from_start + .find('|') + .or_else(|| from_start.find(",")) + .or_else(|| Some(from_start.len())) + .unwrap() + + domain_start; + let source_hostname = &raw_line[domain_start..domain_end]; + + source_hostname + } else if rule.mask.contains(NetworkFilterMask::THIRD_PARTY) { + "always-third-party.com" } else { - (hostname, domain) + hostname }; + let source_url = format!("https://{}", source_hostname); + Request::new( - raw_type, &url, - "https", - hostname, - domain, - source_hostname, - source_domain, - ) + &source_url, + raw_type, + ).unwrap() }) .collect::>() } -fn bench_fn(blocker: &Blocker, requests: &[Request]) { +fn bench_fn(blocker: &Blocker, resources: &ResourceStorage, requests: &[Request]) { requests.iter().for_each(|request| { - let block_result = blocker.check(&request); - assert!(block_result.redirect.is_some()); + let block_result = blocker.check(&request, &resources); + assert!(block_result.redirect.is_some(), "{:?}, {:?}", request, block_result); }); } @@ -218,6 +235,7 @@ fn redirect_performance(c: &mut Criterion) { let rules = get_redirect_rules(); let blocker = get_preloaded_blocker(rules.clone()); + let resources = build_resources_for_filters(&rules); let requests = build_custom_requests(rules.clone()); let requests_len = requests.len() as u64; @@ -225,7 +243,7 @@ fn redirect_performance(c: &mut Criterion) { group.sample_size(10); group.bench_function("without_alias_lookup", move |b| { - b.iter(|| bench_fn(&blocker, &requests)) + b.iter(|| bench_fn(&blocker, &resources, &requests)) }); group.finish(); diff --git a/third_party/rust/adblock/v0_7/crate/benches/bench_regex.rs b/third_party/rust/adblock/v0_8/crate/benches/bench_regex.rs similarity index 100% rename from third_party/rust/adblock/v0_7/crate/benches/bench_regex.rs rename to third_party/rust/adblock/v0_8/crate/benches/bench_regex.rs diff --git a/third_party/rust/adblock/v0_7/crate/benches/bench_rules.rs b/third_party/rust/adblock/v0_8/crate/benches/bench_rules.rs similarity index 77% rename from third_party/rust/adblock/v0_7/crate/benches/bench_rules.rs rename to third_party/rust/adblock/v0_8/crate/benches/bench_rules.rs index 55fd4843f04e..82436002ce0f 100644 --- a/third_party/rust/adblock/v0_7/crate/benches/bench_rules.rs +++ b/third_party/rust/adblock/v0_8/crate/benches/bench_rules.rs @@ -2,15 +2,16 @@ use criterion::*; use once_cell::sync::Lazy; use adblock::blocker::{Blocker, BlockerOptions}; -use adblock::utils::{read_file_lines, rules_from_lists}; + +#[path = "../tests/test_utils.rs"] +mod test_utils; +use test_utils::rules_from_lists; static DEFAULT_LISTS: Lazy> = Lazy::new(|| { - rules_from_lists(&vec![String::from( + rules_from_lists(&[ "data/easylist.to/easylist/easylist.txt", - )]) + ]).collect() }); -static DEFAULT_RULES_LISTS: Lazy>> = - Lazy::new(|| vec![read_file_lines("data/easylist.to/easylist/easylist.txt")]); fn bench_string_hashing(filters: &Vec) -> adblock::utils::Hash { let mut dummy: adblock::utils::Hash = 0; @@ -52,11 +53,11 @@ fn string_tokenize(c: &mut Criterion) { group.finish(); } -fn bench_parsing_impl(lists: &Vec>) -> usize { +fn bench_parsing_impl(lists: &Vec<&Vec>) -> usize { let mut dummy = 0; for list in lists { - let (network_filters, _) = adblock::lists::parse_filters(list, false, Default::default()); + let (network_filters, _) = adblock::lists::parse_filters(*list, false, Default::default()); dummy = dummy + network_filters.len() % 1000000; } @@ -70,17 +71,17 @@ fn list_parse(c: &mut Criterion) { group.sample_size(10); group.bench_function("network filters", |b| { - b.iter(|| bench_parsing_impl(&DEFAULT_RULES_LISTS)) + b.iter(|| bench_parsing_impl(&vec![DEFAULT_LISTS.as_ref()])) }); group.bench_function("all filters", |b| { - b.iter(|| bench_parsing_impl(&DEFAULT_RULES_LISTS)) + b.iter(|| bench_parsing_impl(&vec![DEFAULT_LISTS.as_ref()])) }); group.finish(); } -fn get_blocker(rules: &Vec) -> Blocker { +fn get_blocker(rules: impl IntoIterator>) -> Blocker { let (network_filters, _) = adblock::lists::parse_filters(rules, false, Default::default()); println!("Got {} network filters", network_filters.len()); @@ -98,10 +99,10 @@ fn blocker_new(c: &mut Criterion) { group.throughput(Throughput::Elements(1)); group.sample_size(10); - let rules = rules_from_lists(&vec![ - String::from("data/easylist.to/easylist/easylist.txt"), - String::from("data/easylist.to/easylist/easyprivacy.txt"), - ]); + let rules: Vec<_> = rules_from_lists(&[ + "data/easylist.to/easylist/easylist.txt", + "data/easylist.to/easylist/easyprivacy.txt", + ]).collect(); group.bench_function("el+ep", move |b| b.iter(|| get_blocker(&rules))); diff --git a/third_party/rust/adblock/v0_7/crate/benches/bench_url.rs b/third_party/rust/adblock/v0_8/crate/benches/bench_url.rs similarity index 57% rename from third_party/rust/adblock/v0_7/crate/benches/bench_url.rs rename to third_party/rust/adblock/v0_8/crate/benches/bench_url.rs index 458b89c01736..a0334208d7c4 100644 --- a/third_party/rust/adblock/v0_7/crate/benches/bench_url.rs +++ b/third_party/rust/adblock/v0_8/crate/benches/bench_url.rs @@ -5,6 +5,10 @@ use serde::{Deserialize, Serialize}; use adblock::request::Request; use adblock::url_parser::parse_url; +#[path = "../tests/test_utils.rs"] +mod test_utils; +use test_utils::rules_from_lists; + #[allow(non_snake_case)] #[derive(Serialize, Deserialize, Clone)] struct TestRequest { @@ -14,7 +18,7 @@ struct TestRequest { } fn load_requests() -> Vec { - adblock::utils::read_file_lines("data/requests.json") + rules_from_lists(&["data/requests.json"]) .into_iter() .map(|r| serde_json::from_str(&r)) .filter_map(Result::ok) @@ -34,7 +38,7 @@ fn request_parsing_throughput(c: &mut Criterion) { b.iter(|| { let mut successful = 0; requests.iter().for_each(|r| { - let req: Result = Request::from_urls(&r.url, &r.frameUrl, &r.cpt); + let req: Result = Request::new(&r.url, &r.frameUrl, &r.cpt); if req.is_ok() { successful += 1; } @@ -80,51 +84,11 @@ fn request_new_throughput(c: &mut Criterion) { group.throughput(Throughput::Elements(requests_len)); group.sample_size(10); - let requests_parsed: Vec<_> = requests - .iter() - .map(|r| { - let url_norm = r.url.to_ascii_lowercase(); - let source_url_norm = r.frameUrl.to_ascii_lowercase(); - - let maybe_parsed_url = parse_url(&url_norm); - if maybe_parsed_url.is_none() { - return Err("bad url"); - } - let parsed_url = maybe_parsed_url.unwrap(); - - let maybe_parsed_source = parse_url(&source_url_norm); - - if maybe_parsed_source.is_none() { - Ok(( - r.cpt.clone(), - parsed_url.url.clone(), - String::from(parsed_url.schema()), - String::from(parsed_url.hostname()), - String::from(parsed_url.domain()), - String::from(""), - String::from(""), - )) - } else { - let parsed_source = maybe_parsed_source.unwrap(); - Ok(( - r.cpt.clone(), - parsed_url.url.clone(), - String::from(parsed_url.schema()), - String::from(parsed_url.hostname()), - String::from(parsed_url.domain()), - String::from(parsed_source.hostname()), - parsed_source.domain().to_owned(), - )) - } - }) - .filter_map(Result::ok) - .collect(); - group.bench_function("new", move |b| { b.iter(|| { let mut successful = 0; - requests_parsed.iter().for_each(|r| { - Request::new(&r.0, &r.1, &r.2, &r.3, &r.4, &r.5, &r.6); + requests.iter().for_each(|r| { + Request::new(&r.url, &r.frameUrl, &r.cpt).ok(); successful += 1; }); }) diff --git a/third_party/rust/adblock/v0_7/crate/rust-toolchain.toml b/third_party/rust/adblock/v0_8/crate/rust-toolchain.toml similarity index 100% rename from third_party/rust/adblock/v0_7/crate/rust-toolchain.toml rename to third_party/rust/adblock/v0_8/crate/rust-toolchain.toml diff --git a/third_party/rust/adblock/v0_7/crate/src/blocker.rs b/third_party/rust/adblock/v0_8/crate/src/blocker.rs similarity index 72% rename from third_party/rust/adblock/v0_7/crate/src/blocker.rs rename to third_party/rust/adblock/v0_8/crate/src/blocker.rs index b6f936dea44f..338eeacb2235 100644 --- a/third_party/rust/adblock/v0_7/crate/src/blocker.rs +++ b/third_party/rust/adblock/v0_8/crate/src/blocker.rs @@ -12,29 +12,31 @@ use thiserror::Error; use lifeguard::Pool; use crate::filters::network::{NetworkFilter, NetworkMatchable}; -use crate::regex_manager::{RegexManager, RegexDebugEntry, RegexManagerDiscardPolicy}; +use crate::regex_manager::{RegexManager, RegexManagerDiscardPolicy}; use crate::request::Request; use crate::utils::{fast_hash, Hash}; use crate::optimizer; -use crate::resources::{Resource, RedirectResourceStorage, RedirectResource}; +use crate::resources::ResourceStorage; use crate::utils; +/// Options used when constructing a [`Blocker`]. pub struct BlockerOptions { pub enable_optimizations: bool, } +/// Describes how a particular network request should be handled. #[derive(Debug, Serialize)] pub struct BlockerResult { + /// Was a blocking filter matched for this request? pub matched: bool, /// Important is used to signal that a rule with the `important` option /// matched. An `important` match means that exceptions should not apply /// and no further checking is neccesary--the request should be blocked /// (empty body or cancelled). /// - /// Brave Browser keeps seperate instances of [`Blocker`] for default - /// lists and regional ones, so `important` here is used to correct - /// behaviour between them: checking should stop instead of moving to the - /// next instance iff an `important` rule matched. + /// Brave Browser keeps multiple instances of [`Blocker`], so `important` + /// here is used to correct behaviour between them: checking should stop + /// instead of moving to the next instance iff an `important` rule matched. pub important: bool, /// Specifies what to load instead of the original request, rather than /// just blocking it outright. This can come from a filter with a `redirect` @@ -59,10 +61,6 @@ pub struct BlockerResult { /// the rule when there is a match and debugging is enabled. Otherwise, on /// a match, it is `Some`. pub filter: Option, - /// The `error` field is only used to signal that there was an error in - /// parsing the provided URLs when using the simpler - /// [`crate::engine::Engine::check_network_urls`] method. - pub error: Option, } impl Default for BlockerResult { @@ -74,32 +72,21 @@ impl Default for BlockerResult { rewritten_url: None, exception: None, filter: None, - error: None, } } } +/// Possible errors when adding a filter to a [`Blocker`]. #[derive(Debug, Error, PartialEq)] pub enum BlockerError { - #[error("serialization failed")] - SerializationError, - #[error("deserialization failed")] - DeserializationError, - #[error("optimized filter existence")] - OptimizedFilterExistence, #[error("$badfilter cannot be added (unsupported)")] BadFilterAddUnsupported, #[error("filter already exists")] FilterExists, } -pub struct BlockerDebugInfo { - pub regex_data: Vec, - pub compiled_regex_count: usize, -} - #[cfg(feature = "object-pooling")] -pub struct TokenPool { +pub(crate) struct TokenPool { pub pool: Pool> } @@ -137,7 +124,6 @@ pub struct Blocker { pub(crate) enable_optimizations: bool, - pub(crate) resources: RedirectResourceStorage, // Not serialized #[cfg(feature = "object-pooling")] pub(crate) pool: TokenPool, @@ -152,8 +138,8 @@ pub struct Blocker { impl Blocker { /// Decide if a network request (usually from WebRequest API) should be /// blocked, redirected or allowed. - pub fn check(&self, request: &Request) -> BlockerResult { - self.check_parameterised(request, false, false) + pub fn check(&self, request: &Request, resources: &ResourceStorage) -> BlockerResult { + self.check_parameterised(request, resources, false, false) } #[cfg(feature = "unsync-regex-caching")] @@ -200,6 +186,7 @@ impl Blocker { pub fn check_parameterised( &self, request: &Request, + resources: &ResourceStorage, matched_rule: bool, force_check_exceptions: bool, ) -> BlockerResult { @@ -225,8 +212,6 @@ impl Blocker { // 3. normal filters - if no match by then // 4. exceptions - if any non-important match of forced - #[cfg(feature = "metrics")] - print!("importants\t"); // Always check important filters let important_filter = self.importants.check( request, @@ -237,8 +222,6 @@ impl Blocker { // only check the rest of the rules if not previously matched let filter = if important_filter.is_none() && !matched_rule { - #[cfg(feature = "metrics")] - print!("tagged\t"); self.filters_tagged .check( request, @@ -247,8 +230,6 @@ impl Blocker { &mut regex_manager, ) .or_else(|| { - #[cfg(feature = "metrics")] - print!("filters\t"); self.filters.check( request, &request_tokens, @@ -263,8 +244,6 @@ impl Blocker { let exception = match filter.as_ref() { // if no other rule matches, only check exceptions if forced to None if matched_rule || force_check_exceptions => { - #[cfg(feature = "metrics")] - print!("exceptions\t"); self.exceptions.check( request, &request_tokens, @@ -276,8 +255,6 @@ impl Blocker { // If matched an important filter, exceptions don't atter Some(f) if f.is_important() => None, Some(_) => { - #[cfg(feature = "metrics")] - print!("exceptions\t"); self.exceptions.check( request, &request_tokens, @@ -287,9 +264,6 @@ impl Blocker { } }; - #[cfg(feature = "metrics")] - println!(); - let redirect_filters = self.redirects.check_all( request, &request_tokens, @@ -298,9 +272,8 @@ impl Blocker { ); // Extract the highest priority redirect directive. - // So far, priority specifiers are not supported, which means: // 1. Exceptions - can bail immediately if found - // 2. Any other redirect resource + // 2. Find highest priority non-exception redirect let redirect_resource = { let mut exceptions = vec![]; for redirect_filter in redirect_filters.iter() { @@ -342,17 +315,13 @@ impl Blocker { }; let redirect: Option = redirect_resource.and_then(|resource_name| { - if let Some(resource) = self.resources.get_resource(resource_name) { - // Only match resource redirects if a matching resource exists - let data_url = format!("data:{};base64,{}", resource.content_type, &resource.data); - Some(data_url.trim().to_owned()) - } else { + resources.get_redirect_resource(resource_name).or_else(|| { // It's acceptable to pass no redirection if no matching resource is loaded. // TODO - it may be useful to return a status flag to indicate that this occurred. #[cfg(test)] eprintln!("Matched rule with redirect option but did not find corresponding resource to send"); None - } + }) }); let important = filter.is_some() && filter.as_ref().map(|f| f.is_important()).unwrap_or_else(|| false); @@ -377,7 +346,6 @@ impl Blocker { rewritten_url, exception: exception.as_ref().map(|f| f.to_string()), // copy the exception filter: filter.as_ref().map(|f| f.to_string()), // copy the filter - error: None, } } @@ -428,7 +396,7 @@ impl Blocker { .map(|param| (param, true)) .collect(); - let filters = removeparam_filters.check_all(request, &request_tokens, &NO_TAGS, regex_manager); + let filters = removeparam_filters.check_all(request, request_tokens, &NO_TAGS, regex_manager); let mut rewrite = false; for removeparam_filter in filters { if let Some(removeparam) = &removeparam_filter.modifier_option { @@ -612,7 +580,6 @@ impl Blocker { // Options enable_optimizations: options.enable_optimizations, - resources: RedirectResourceStorage::default(), #[cfg(feature = "object-pooling")] pool: TokenPool::default(), regex_manager: Default::default(), @@ -634,6 +601,8 @@ impl Blocker { self.generic_hide.optimize(); } + /// Has this exact filter already been added? Note that this is a best-effort method and may + /// miss some filters, especially if optimizations are enabled. pub fn filter_exists(&self, filter: &NetworkFilter) -> bool { if filter.is_csp() { self.csp.filter_exists(filter) @@ -654,6 +623,9 @@ impl Blocker { } } + /// Add a single filter to this [`Blocker`]. + /// + /// Filter optimization is skipped when using this method. pub fn add_filter(&mut self, filter: NetworkFilter) -> Result<(), BlockerError> { // Redirects are independent of blocking behavior. if filter.is_redirect() { @@ -727,19 +699,6 @@ impl Blocker { self.tags_enabled.iter().cloned().collect() } - pub fn use_resources(&mut self, resources: &[Resource]) { - let resources = RedirectResourceStorage::from_resources(resources); - self.resources = resources; - } - - pub fn add_resource(&mut self, resource: &Resource) -> Result<(), crate::resources::AddResourceError> { - self.resources.add_resource(resource) - } - - pub fn get_resource(&self, key: &str) -> Option<&RedirectResource> { - self.resources.get_resource(key) - } - pub fn set_regex_discard_policy( &self, new_discard_policy: RegexManagerDiscardPolicy @@ -748,25 +707,21 @@ impl Blocker { regex_manager.set_discard_policy(new_discard_policy); } - #[cfg(feature = "debug-info")] + #[cfg(feature = "regex-debug-info")] pub fn discard_regex(&self, regex_id: u64) { let mut regex_manager = self.borrow_regex_manager(); regex_manager.discard_regex(regex_id); } - #[cfg(feature = "debug-info")] - pub fn get_debug_info(&self) -> BlockerDebugInfo { + #[cfg(feature = "regex-debug-info")] + pub fn get_regex_debug_info(&self) -> crate::regex_manager::RegexDebugInfo { let regex_manager = self.borrow_regex_manager(); - BlockerDebugInfo { - regex_data: regex_manager.get_debug_regex_data(), - compiled_regex_count: regex_manager.get_compiled_regex_count(), - } + regex_manager.get_debug_info() } - } #[derive(Serialize, Deserialize, Default)] -pub struct NetworkFilterList { +pub(crate) struct NetworkFilterList { #[serde(serialize_with = "crate::data_format::utils::stabilize_hashmap_serialization")] pub(crate) filter_map: HashMap>>, } @@ -877,10 +832,8 @@ impl NetworkFilterList { } } + /// This may not work if the list has been optimized. pub fn filter_exists(&self, filter: &NetworkFilter) -> bool { - // if self.optimized == Some(true) { - // return Err(BlockerError::OptimizedFilterExistence) - // } let mut tokens: Vec<_> = filter.get_tokens().into_iter().flatten().collect(); if tokens.is_empty() { @@ -912,31 +865,14 @@ impl NetworkFilterList { active_tags: &HashSet, regex_manager: &mut RegexManager, ) -> Option<&NetworkFilter> { - #[cfg(feature = "metrics")] - let mut filters_checked = 0; - #[cfg(feature = "metrics")] - let mut filter_buckets = 0; - - #[cfg(not(feature = "metrics"))] - { - if self.filter_map.is_empty() { - return None; - } + if self.filter_map.is_empty() { + return None; } if let Some(source_hostname_hashes) = request.source_hostname_hashes.as_ref() { for token in source_hostname_hashes { if let Some(filter_bucket) = self.filter_map.get(token) { - #[cfg(feature = "metrics")] - { - filter_buckets += 1; - } - for filter in filter_bucket { - #[cfg(feature = "metrics")] - { - filters_checked += 1; - } // if matched, also needs to be tagged with an active tag (or not tagged at all) if filter.matches(request, regex_manager) && filter @@ -945,8 +881,6 @@ impl NetworkFilterList { .map(|t| active_tags.contains(t)) .unwrap_or(true) { - #[cfg(feature = "metrics")] - print!("true\t{}\t{}\tskipped\t{}\t{}\t", filter_buckets, filters_checked, filter_buckets, filters_checked); return Some(filter); } } @@ -954,33 +888,17 @@ impl NetworkFilterList { } } - #[cfg(feature = "metrics")] - print!("false\t{}\t{}\t", filter_buckets, filters_checked); - for token in request_tokens { if let Some(filter_bucket) = self.filter_map.get(token) { - #[cfg(feature = "metrics")] - { - filter_buckets += 1; - } for filter in filter_bucket { - #[cfg(feature = "metrics")] - { - filters_checked += 1; - } // if matched, also needs to be tagged with an active tag (or not tagged at all) if filter.matches(request, regex_manager) && filter.tag.as_ref().map(|t| active_tags.contains(t)).unwrap_or(true) { - #[cfg(feature = "metrics")] - print!("true\t{}\t{}\t", filter_buckets, filters_checked); return Some(filter); } } } } - #[cfg(feature = "metrics")] - print!("false\t{}\t{}\t", filter_buckets, filters_checked); - None } @@ -995,37 +913,18 @@ impl NetworkFilterList { active_tags: &HashSet, regex_manager: &mut RegexManager, ) -> Vec<&NetworkFilter> { - #[cfg(feature = "metrics")] - let mut filters_checked = 0; - #[cfg(feature = "metrics")] - let mut filter_buckets = 0; - let mut filters: Vec<&NetworkFilter> = vec![]; - #[cfg(not(feature = "metrics"))] - { - if self.filter_map.is_empty() { - return filters; - } + if self.filter_map.is_empty() { + return filters; } if let Some(source_hostname_hashes) = request.source_hostname_hashes.as_ref() { for token in source_hostname_hashes { if let Some(filter_bucket) = self.filter_map.get(token) { - #[cfg(feature = "metrics")] - { - filter_buckets += 1; - } - for filter in filter_bucket { - #[cfg(feature = "metrics")] - { - filters_checked += 1; - } // if matched, also needs to be tagged with an active tag (or not tagged at all) if filter.matches(request, regex_manager) && filter.tag.as_ref().map(|t| active_tags.contains(t)).unwrap_or(true) { - #[cfg(feature = "metrics")] - print!("true\t{}\t{}\tskipped\t{}\t{}\t", filter_buckets, filters_checked, filter_buckets, filters_checked); filters.push(filter); } } @@ -1033,33 +932,17 @@ impl NetworkFilterList { } } - #[cfg(feature = "metrics")] - print!("false\t{}\t{}\t", filter_buckets, filters_checked); - for token in request_tokens { if let Some(filter_bucket) = self.filter_map.get(token) { - #[cfg(feature = "metrics")] - { - filter_buckets += 1; - } for filter in filter_bucket { - #[cfg(feature = "metrics")] - { - filters_checked += 1; - } // if matched, also needs to be tagged with an active tag (or not tagged at all) if filter.matches(request, regex_manager) && filter.tag.as_ref().map(|t| active_tags.contains(t)).unwrap_or(true) { - #[cfg(feature = "metrics")] - print!("true\t{}\t{}\t", filter_buckets, filters_checked); filters.push(filter); } } } } - #[cfg(feature = "metrics")] - print!("false\t{}\t{}\t", filter_buckets, filters_checked); - filters } } @@ -1181,7 +1064,7 @@ mod tests { #[test] fn network_filter_list_new_works() { { - let filters = vec!["||foo.com"]; + let filters = ["||foo.com"]; let network_filters: Vec<_> = filters .into_iter() .map(|f| NetworkFilter::parse(&f, true, Default::default())) @@ -1193,7 +1076,7 @@ mod tests { } // choses least frequent token { - let filters = vec!["||foo.com", "||bar.com/foo"]; + let filters = ["||foo.com", "||bar.com/foo"]; let network_filters: Vec<_> = filters .into_iter() .map(|f| NetworkFilter::parse(&f, true, Default::default())) @@ -1211,7 +1094,7 @@ mod tests { } // choses blacklisted token when no other choice { - let filters = vec!["||foo.com", "||foo.com/bar", "||www"]; + let filters = ["||foo.com", "||foo.com/bar", "||www"]; let network_filters: Vec<_> = filters .into_iter() .map(|f| NetworkFilter::parse(&f, true, Default::default())) @@ -1230,7 +1113,7 @@ mod tests { } // uses domain as token when only one domain { - let filters = vec!["||foo.com", "||foo.com$domain=bar.com"]; + let filters = ["||foo.com", "||foo.com$domain=bar.com"]; let network_filters: Vec<_> = filters .into_iter() .map(|f| NetworkFilter::parse(&f, true, Default::default())) @@ -1253,7 +1136,7 @@ mod tests { } // dispatches filter to multiple buckets per domain options if no token in main part { - let filters = vec!["foo*$domain=bar.com|baz.com"]; + let filters = ["foo*$domain=bar.com|baz.com"]; let network_filters: Vec<_> = filters .into_iter() .map(|f| NetworkFilter::parse(&f, true, Default::default())) @@ -1290,10 +1173,10 @@ mod tests { } } - fn test_requests_filters(filters: &Vec<&str>, requests: &Vec<(Request, bool)>) { + fn test_requests_filters(filters: impl IntoIterator>, requests: &[(Request, bool)]) { let network_filters: Vec<_> = filters .into_iter() - .map(|f| NetworkFilter::parse(&f, true, Default::default())) + .map(|f| NetworkFilter::parse(&f.as_ref(), true, Default::default())) .filter_map(Result::ok) .collect(); let filter_list = NetworkFilterList::new(network_filters, false); @@ -1315,7 +1198,7 @@ mod tests { #[test] fn network_filter_list_check_works_plain_filter() { // includes cases with fall back to 0 bucket (no tokens from a rule) - let filters = vec![ + let filters = [ "foo", "-foo-", "&fo.o=+_-", @@ -1324,7 +1207,7 @@ mod tests { "https://bar.com/bar/baz", ]; - let url_results = vec![ + let url_results = [ ("https://bar.com/foo", true), ("https://bar.com/baz/foo", true), ("https://bar.com/q=foo/baz", true), @@ -1338,7 +1221,7 @@ mod tests { let request_expectations: Vec<_> = url_results .into_iter() .map(|(url, expected_result)| { - let request = Request::from_url(url).unwrap(); + let request = Request::new(url, "https://example.com", "other").unwrap(); (request, expected_result) }) .collect(); @@ -1348,7 +1231,7 @@ mod tests { #[test] fn network_filter_list_check_works_hostname_anchor() { - let filters = vec![ + let filters = [ "||foo.com", "||bar.com/bar", "||coo.baz.", @@ -1356,7 +1239,7 @@ mod tests { "||foo.baz^", ]; - let url_results = vec![ + let url_results = [ ("https://foo.com/bar", true), ("https://bar.com/bar", true), ("https://baz.com/bar", false), @@ -1373,7 +1256,7 @@ mod tests { let request_expectations: Vec<_> = url_results .into_iter() .map(|(url, expected_result)| { - let request = Request::from_url(url).unwrap(); + let request = Request::new(url, "https://example.com", "other").unwrap(); (request, expected_result) }) .collect(); @@ -1383,61 +1266,45 @@ mod tests { #[test] fn network_filter_list_check_works_unicode() { - let filters = vec![ + let filters = [ "||firstrowsports.li/frame/", "||fırstrowsports.eu/pu/", "||atÄ‘he.net/pu/", ]; - let url_results = vec![ - ( - Request::from_url("https://firstrowsports.li/frame/bar").unwrap(), - true, - ), - ( - Request::from_url("https://secondrowsports.li/frame/bar").unwrap(), - false, - ), - ( - Request::from_url("https://fırstrowsports.eu/pu/foo").unwrap(), - true, - ), - ( - Request::from_url("https://xn--frstrowsports-39b.eu/pu/foo").unwrap(), - true, - ), - ( - Request::from_url("https://atÄ‘he.net/pu/foo").unwrap(), - true, - ), - ( - Request::from_url("https://xn--athe-1ua.net/pu/foo").unwrap(), - true, - ), + let url_results = [ + ("https://firstrowsports.li/frame/bar", true), + ("https://secondrowsports.li/frame/bar", false), + ("https://fırstrowsports.eu/pu/foo", true), + ("https://xn--frstrowsports-39b.eu/pu/foo", true), + ("https://atÄ‘he.net/pu/foo", true), + ("https://xn--athe-1ua.net/pu/foo", true), ]; let request_expectations: Vec<_> = url_results .into_iter() - .map(|(request, expected_result)| (request, expected_result)) - .collect(); + .map(|(url, expected_result)| { + let request = Request::new(url, "https://example.com", "other").unwrap(); + (request, expected_result) + }).collect(); test_requests_filters(&filters, &request_expectations); } #[test] fn network_filter_list_check_works_regex_escaping() { - let filters = vec![ + let filters = [ r#"/^https?:\/\/.*(bitly|bit)\.(com|ly)\/.*/$domain=123movies.com|1337x.to"#, r#"/\:\/\/data.*\.com\/[a-zA-Z0-9]{30,}/$third-party,xmlhttprequest"# ]; - let url_results = vec![ + let url_results = [ ( - Request::from_urls("https://bit.ly/bar/", "http://123movies.com", "").unwrap(), + Request::new("https://bit.ly/bar/", "http://123movies.com", "").unwrap(), true, ), ( - Request::from_urls( + Request::new( "https://data.foo.com/9VjjrjU9Or2aqkb8PDiqTBnULPgeI48WmYEHkYer", "http://123movies.com", "xmlhttprequest", @@ -1461,17 +1328,18 @@ mod blocker_tests { use super::*; use crate::lists::parse_filters; + use crate::resources::Resource; use crate::request::Request; use std::collections::HashSet; use std::iter::FromIterator; #[test] fn single_slash() { - let filters = vec![ - String::from("/|"), + let filters = [ + "/|", ]; - let (network_filters, _) = parse_filters(&filters, true, Default::default()); + let (network_filters, _) = parse_filters(filters, true, Default::default()); let blocker_options = BlockerOptions { enable_optimizations: true, @@ -1479,14 +1347,14 @@ mod blocker_tests { let blocker = Blocker::new(network_filters, &blocker_options); - let request = Request::from_urls("https://example.com/test/", "https://example.com", "xmlhttprequest").unwrap(); - assert!(blocker.check(&request).matched); + let request = Request::new("https://example.com/test/", "https://example.com", "xmlhttprequest").unwrap(); + assert!(blocker.check(&request, &Default::default()).matched); - let request = Request::from_urls("https://example.com/test", "https://example.com", "xmlhttprequest").unwrap(); - assert!(!blocker.check(&request).matched); + let request = Request::new("https://example.com/test", "https://example.com", "xmlhttprequest").unwrap(); + assert!(!blocker.check(&request, &Default::default()).matched); } - fn test_requests_filters(filters: &[String], requests: &[(Request, bool)]) { + fn test_requests_filters(filters: impl IntoIterator>, requests: &[(Request, bool)]) { let (network_filters, _) = parse_filters(filters, true, Default::default()); let blocker_options: BlockerOptions = BlockerOptions { @@ -1496,7 +1364,7 @@ mod blocker_tests { let blocker = Blocker::new(network_filters, &blocker_options); requests.iter().for_each(|(req, expected_result)| { - let matched_rule = blocker.check(&req); + let matched_rule = blocker.check(&req, &Default::default()); if *expected_result { assert!(matched_rule.matched, "Expected match for {}", req.url); } else { @@ -1507,12 +1375,12 @@ mod blocker_tests { #[test] fn redirect_blocking_exception() { - let filters = vec![ - String::from("||imdb-video.media-imdb.com$media,redirect=noop-0.1s.mp3"), - String::from("@@||imdb-video.media-imdb.com^$domain=imdb.com"), + let filters = [ + "||imdb-video.media-imdb.com$media,redirect=noop-0.1s.mp3", + "@@||imdb-video.media-imdb.com^$domain=imdb.com", ]; - let request = Request::from_urls("https://imdb-video.media-imdb.com/kBOeI88k1o23eNAi", "https://www.imdb.com/video/13", "media").unwrap(); + let request = Request::new("https://imdb-video.media-imdb.com/kBOeI88k1o23eNAi", "https://www.imdb.com/video/13", "media").unwrap(); let (network_filters, _) = parse_filters(&filters, true, Default::default()); @@ -1520,31 +1388,28 @@ mod blocker_tests { enable_optimizations: false, }; - let mut blocker = Blocker::new(network_filters, &blocker_options); + let blocker = Blocker::new(network_filters, &blocker_options); + let mut resources = ResourceStorage::default(); - blocker.add_resource(&Resource { - name: "noop-0.1s.mp3".to_string(), - aliases: vec![], - kind: crate::resources::ResourceType::Mime(crate::resources::MimeType::AudioMp3), - content: base64::encode("mp3"), - }).unwrap(); + resources.add_resource( + Resource::simple("noop-0.1s.mp3", crate::resources::MimeType::AudioMp3, "mp3"), + ).unwrap(); - let matched_rule = blocker.check(&request); + let matched_rule = blocker.check(&request, &resources); assert_eq!(matched_rule.matched, false); assert_eq!(matched_rule.important, false); assert_eq!(matched_rule.redirect, Some("data:audio/mp3;base64,bXAz".to_string())); assert_eq!(matched_rule.exception, Some("@@||imdb-video.media-imdb.com^$domain=imdb.com".to_string())); - assert_eq!(matched_rule.error, None); } #[test] fn redirect_exception() { - let filters = vec![ - String::from("||imdb-video.media-imdb.com$media,redirect=noop-0.1s.mp3"), - String::from("@@||imdb-video.media-imdb.com^$domain=imdb.com,redirect=noop-0.1s.mp3"), + let filters = [ + "||imdb-video.media-imdb.com$media,redirect=noop-0.1s.mp3", + "@@||imdb-video.media-imdb.com^$domain=imdb.com,redirect=noop-0.1s.mp3", ]; - let request = Request::from_urls("https://imdb-video.media-imdb.com/kBOeI88k1o23eNAi", "https://www.imdb.com/video/13", "media").unwrap(); + let request = Request::new("https://imdb-video.media-imdb.com/kBOeI88k1o23eNAi", "https://www.imdb.com/video/13", "media").unwrap(); let (network_filters, _) = parse_filters(&filters, true, Default::default()); @@ -1552,31 +1417,28 @@ mod blocker_tests { enable_optimizations: false, }; - let mut blocker = Blocker::new(network_filters, &blocker_options); + let blocker = Blocker::new(network_filters, &blocker_options); + let mut resources = ResourceStorage::default(); - blocker.add_resource(&Resource { - name: "noop-0.1s.mp3".to_string(), - aliases: vec![], - kind: crate::resources::ResourceType::Mime(crate::resources::MimeType::AudioMp3), - content: base64::encode("mp3"), - }).unwrap(); + resources.add_resource( + Resource::simple("noop-0.1s.mp3", crate::resources::MimeType::AudioMp3, "mp3"), + ).unwrap(); - let matched_rule = blocker.check(&request); + let matched_rule = blocker.check(&request, &resources); assert_eq!(matched_rule.matched, false); assert_eq!(matched_rule.important, false); assert_eq!(matched_rule.redirect, None); assert_eq!(matched_rule.exception, Some("@@||imdb-video.media-imdb.com^$domain=imdb.com,redirect=noop-0.1s.mp3".to_string())); - assert_eq!(matched_rule.error, None); } #[test] fn redirect_rule_redirection() { - let filters = vec![ - String::from("||doubleclick.net^"), - String::from("||www3.doubleclick.net^$xmlhttprequest,redirect-rule=noop.txt,domain=lineups.fun"), + let filters = [ + "||doubleclick.net^", + "||www3.doubleclick.net^$xmlhttprequest,redirect-rule=noop.txt,domain=lineups.fun", ]; - let request = Request::from_urls("https://www3.doubleclick.net", "https://lineups.fun", "xhr").unwrap(); + let request = Request::new("https://www3.doubleclick.net", "https://lineups.fun", "xhr").unwrap(); let (network_filters, _) = parse_filters(&filters, true, Default::default()); @@ -1584,31 +1446,24 @@ mod blocker_tests { enable_optimizations: false, }; - let mut blocker = Blocker::new(network_filters, &blocker_options); + let blocker = Blocker::new(network_filters, &blocker_options); + let mut resources = ResourceStorage::default(); - blocker.add_resource(&Resource { - name: "noop.txt".to_string(), - aliases: vec![], - kind: crate::resources::ResourceType::Mime(crate::resources::MimeType::TextPlain), - content: base64::encode("noop"), - }).unwrap(); + resources.add_resource(Resource::simple("noop.txt", crate::resources::MimeType::TextPlain, "noop")).unwrap(); - let matched_rule = blocker.check(&request); + let matched_rule = blocker.check(&request, &resources); assert_eq!(matched_rule.matched, true); assert_eq!(matched_rule.important, false); assert_eq!(matched_rule.redirect, Some("data:text/plain;base64,bm9vcA==".to_string())); assert_eq!(matched_rule.exception, None); - assert_eq!(matched_rule.error, None); } #[test] fn badfilter_does_not_match() { - let filters = vec![ - String::from("||foo.com$badfilter") - ]; - let url_results = vec![ + let filters = ["||foo.com$badfilter"]; + let url_results = [ ( - Request::from_urls("https://foo.com", "https://bar.com", "image").unwrap(), + Request::new("https://foo.com", "https://bar.com", "image").unwrap(), false, ), ]; @@ -1623,13 +1478,13 @@ mod blocker_tests { #[test] fn badfilter_cancels_with_same_id() { - let filters = vec![ - String::from("||foo.com$domain=bar.com|foo.com,badfilter"), - String::from("||foo.com$domain=foo.com|bar.com") + let filters = [ + "||foo.com$domain=bar.com|foo.com,badfilter", + "||foo.com$domain=foo.com|bar.com", ]; - let url_results = vec![ + let url_results = [ ( - Request::from_urls("https://foo.com", "https://bar.com", "image").unwrap(), + Request::new("https://foo.com", "https://bar.com", "image").unwrap(), false, ), ]; @@ -1644,13 +1499,13 @@ mod blocker_tests { #[test] fn badfilter_does_not_cancel_similar_filter() { - let filters = vec![ - String::from("||foo.com$domain=bar.com|foo.com,badfilter"), - String::from("||foo.com$domain=foo.com|bar.com,image") + let filters = [ + "||foo.com$domain=bar.com|foo.com,badfilter", + "||foo.com$domain=foo.com|bar.com,image", ]; - let url_results = vec![ + let url_results = [ ( - Request::from_urls("https://foo.com", "https://bar.com", "image").unwrap(), + Request::new("https://foo.com", "https://bar.com", "image").unwrap(), true, ), ]; @@ -1665,15 +1520,15 @@ mod blocker_tests { #[test] fn hostname_regex_filter_works() { - let filters = vec![ - String::from("||alimc*.top^$domain=letv.com"), - String::from("||aa*.top^$domain=letv.com") + let filters = [ + "||alimc*.top^$domain=letv.com", + "||aa*.top^$domain=letv.com", ]; - let url_results = vec![ - (Request::from_urls("https://r.alimc1.top/test.js", "https://minisite.letv.com/", "script").unwrap(), true), - (Request::from_urls("https://www.baidu.com/test.js", "https://minisite.letv.com/", "script").unwrap(), false), - (Request::from_urls("https://r.aabb.top/test.js", "https://example.com/", "script").unwrap(), false), - (Request::from_urls("https://r.aabb.top/test.js", "https://minisite.letv.com/", "script").unwrap(), true), + let url_results = [ + (Request::new("https://r.alimc1.top/test.js", "https://minisite.letv.com/", "script").unwrap(), true), + (Request::new("https://www.baidu.com/test.js", "https://minisite.letv.com/", "script").unwrap(), false), + (Request::new("https://r.aabb.top/test.js", "https://example.com/", "script").unwrap(), false), + (Request::new("https://r.aabb.top/test.js", "https://minisite.letv.com/", "script").unwrap(), true), ]; let (network_filters, _) = parse_filters(&filters, true, Default::default()); @@ -1683,9 +1538,10 @@ mod blocker_tests { }; let blocker = Blocker::new(network_filters, &blocker_options); + let resources = ResourceStorage::default(); url_results.into_iter().for_each(|(req, expected_result)| { - let matched_rule = blocker.check(&req); + let matched_rule = blocker.check(&req, &resources); if expected_result { assert!(matched_rule.matched, "Expected match for {}", req.url); } else { @@ -1696,14 +1552,14 @@ mod blocker_tests { #[test] fn get_csp_directives() { - let filters = vec![ - String::from("$csp=script-src 'self' * 'unsafe-inline',domain=thepiratebay.vip|pirateproxy.live|thehiddenbay.com|downloadpirate.com|thepiratebay10.org|kickass.vip|pirateproxy.app|ukpass.co|prox.icu|pirateproxy.life"), - String::from("$csp=worker-src 'none',domain=pirateproxy.live|thehiddenbay.com|tpb.party|thepiratebay.org|thepiratebay.vip|thepiratebay10.org|flashx.cc|vidoza.co|vidoza.net"), - String::from("||1337x.to^$csp=script-src 'self' 'unsafe-inline'"), - String::from("@@^no-csp^$csp=script-src 'self' 'unsafe-inline'"), - String::from("^duplicated-directive^$csp=worker-src 'none'"), - String::from("@@^disable-all^$csp"), - String::from("^first-party-only^$csp=script-src 'none',1p"), + let filters = [ + "$csp=script-src 'self' * 'unsafe-inline',domain=thepiratebay.vip|pirateproxy.live|thehiddenbay.com|downloadpirate.com|thepiratebay10.org|kickass.vip|pirateproxy.app|ukpass.co|prox.icu|pirateproxy.life", + "$csp=worker-src 'none',domain=pirateproxy.live|thehiddenbay.com|tpb.party|thepiratebay.org|thepiratebay.vip|thepiratebay10.org|flashx.cc|vidoza.co|vidoza.net", + "||1337x.to^$csp=script-src 'self' 'unsafe-inline'", + "@@^no-csp^$csp=script-src 'self' 'unsafe-inline'", + "^duplicated-directive^$csp=worker-src 'none'", + "@@^disable-all^$csp", + "^first-party-only^$csp=script-src 'none',1p", ]; let (network_filters, _) = parse_filters(&filters, true, Default::default()); @@ -1715,48 +1571,48 @@ mod blocker_tests { let blocker = Blocker::new(network_filters, &blocker_options); { // No directives should be returned for requests that are not `document` or `subdocument` content types. - assert_eq!(blocker.get_csp_directives(&Request::from_urls("https://pirateproxy.live/static/custom_ads.js", "https://pirateproxy.live", "script").unwrap()), None); - assert_eq!(blocker.get_csp_directives(&Request::from_urls("https://pirateproxy.live/static/custom_ads.js", "https://pirateproxy.live", "image").unwrap()), None); - assert_eq!(blocker.get_csp_directives(&Request::from_urls("https://pirateproxy.live/static/custom_ads.js", "https://pirateproxy.live", "object").unwrap()), None); + assert_eq!(blocker.get_csp_directives(&Request::new("https://pirateproxy.live/static/custom_ads.js", "https://pirateproxy.live", "script").unwrap()), None); + assert_eq!(blocker.get_csp_directives(&Request::new("https://pirateproxy.live/static/custom_ads.js", "https://pirateproxy.live", "image").unwrap()), None); + assert_eq!(blocker.get_csp_directives(&Request::new("https://pirateproxy.live/static/custom_ads.js", "https://pirateproxy.live", "object").unwrap()), None); } { // A single directive should be returned if only one match is present in the engine, for both document and subdocument types - assert_eq!(blocker.get_csp_directives(&Request::from_urls("https://example.com", "https://vidoza.co", "document").unwrap()), Some(String::from("worker-src 'none'"))); - assert_eq!(blocker.get_csp_directives(&Request::from_urls("https://example.com", "https://vidoza.net", "subdocument").unwrap()), Some(String::from("worker-src 'none'"))); + assert_eq!(blocker.get_csp_directives(&Request::new("https://example.com", "https://vidoza.co", "document").unwrap()), Some(String::from("worker-src 'none'"))); + assert_eq!(blocker.get_csp_directives(&Request::new("https://example.com", "https://vidoza.net", "subdocument").unwrap()), Some(String::from("worker-src 'none'"))); } { // Multiple merged directives should be returned if more than one match is present in the engine - let possible_results = vec![ + let possible_results = [ Some(String::from("script-src 'self' * 'unsafe-inline',worker-src 'none'")), Some(String::from("worker-src 'none',script-src 'self' * 'unsafe-inline'")), ]; - assert!(possible_results.contains(&blocker.get_csp_directives(&Request::from_urls("https://example.com", "https://pirateproxy.live", "document").unwrap()))); - assert!(possible_results.contains(&blocker.get_csp_directives(&Request::from_urls("https://example.com", "https://pirateproxy.live", "subdocument").unwrap()))); + assert!(possible_results.contains(&blocker.get_csp_directives(&Request::new("https://example.com", "https://pirateproxy.live", "document").unwrap()))); + assert!(possible_results.contains(&blocker.get_csp_directives(&Request::new("https://example.com", "https://pirateproxy.live", "subdocument").unwrap()))); } { // A directive with an exception should not be returned - assert_eq!(blocker.get_csp_directives(&Request::from_urls("https://1337x.to", "https://1337x.to", "document").unwrap()), Some(String::from("script-src 'self' 'unsafe-inline'"))); - assert_eq!(blocker.get_csp_directives(&Request::from_urls("https://1337x.to/no-csp", "https://1337x.to", "subdocument").unwrap()), None); + assert_eq!(blocker.get_csp_directives(&Request::new("https://1337x.to", "https://1337x.to", "document").unwrap()), Some(String::from("script-src 'self' 'unsafe-inline'"))); + assert_eq!(blocker.get_csp_directives(&Request::new("https://1337x.to/no-csp", "https://1337x.to", "subdocument").unwrap()), None); } { // Multiple identical directives should only appear in the output once - assert_eq!(blocker.get_csp_directives(&Request::from_urls("https://example.com/duplicated-directive", "https://flashx.cc", "document").unwrap()), Some(String::from("worker-src 'none'"))); - assert_eq!(blocker.get_csp_directives(&Request::from_urls("https://example.com/duplicated-directive", "https://flashx.cc", "subdocument").unwrap()), Some(String::from("worker-src 'none'"))); + assert_eq!(blocker.get_csp_directives(&Request::new("https://example.com/duplicated-directive", "https://flashx.cc", "document").unwrap()), Some(String::from("worker-src 'none'"))); + assert_eq!(blocker.get_csp_directives(&Request::new("https://example.com/duplicated-directive", "https://flashx.cc", "subdocument").unwrap()), Some(String::from("worker-src 'none'"))); } { // A CSP exception with no corresponding directive should disable all CSP injections for the page - assert_eq!(blocker.get_csp_directives(&Request::from_urls("https://1337x.to/duplicated-directive/disable-all", "https://thepiratebay10.org", "document").unwrap()), None); - assert_eq!(blocker.get_csp_directives(&Request::from_urls("https://1337x.to/duplicated-directive/disable-all", "https://thepiratebay10.org", "document").unwrap()), None); + assert_eq!(blocker.get_csp_directives(&Request::new("https://1337x.to/duplicated-directive/disable-all", "https://thepiratebay10.org", "document").unwrap()), None); + assert_eq!(blocker.get_csp_directives(&Request::new("https://1337x.to/duplicated-directive/disable-all", "https://thepiratebay10.org", "document").unwrap()), None); } { // A CSP exception with a partyness modifier should only match where the modifier applies - assert_eq!(blocker.get_csp_directives(&Request::from_urls("htps://github.com/first-party-only", "https://example.com", "subdocument").unwrap()), None); - assert_eq!(blocker.get_csp_directives(&Request::from_urls("https://example.com/first-party-only", "https://example.com", "document").unwrap()), Some(String::from("script-src 'none'"))); + assert_eq!(blocker.get_csp_directives(&Request::new("htps://github.com/first-party-only", "https://example.com", "subdocument").unwrap()), None); + assert_eq!(blocker.get_csp_directives(&Request::new("https://example.com/first-party-only", "https://example.com", "document").unwrap()), Some(String::from("script-src 'none'"))); } } #[test] fn test_removeparam() { - let filters = vec![ - String::from("||example.com^$removeparam=test"), - String::from("*$removeparam=fbclid"), - String::from("/script.js$redirect-rule=noopjs"), - String::from("^block^$important"), - String::from("$removeparam=testCase,~image"), + let filters = [ + "||example.com^$removeparam=test", + "*$removeparam=fbclid", + "/script.js$redirect-rule=noopjs", + "^block^$important", + "$removeparam=testCase,~image", ]; let (network_filters, _) = parse_filters(&filters, true, Default::default()); @@ -1765,89 +1621,86 @@ mod blocker_tests { enable_optimizations: true, }; - let mut blocker = Blocker::new(network_filters, &blocker_options); - blocker.add_resource(&Resource { - name: "noopjs".into(), - aliases: vec![], - kind: crate::resources::ResourceType::Mime(crate::resources::MimeType::ApplicationJavascript), - content: base64::encode("(() => {})()"), - }).unwrap(); - - let result = blocker.check(&Request::from_urls("https://example.com?q=1&test=2#blue", "https://antonok.com", "script").unwrap()); + let blocker = Blocker::new(network_filters, &blocker_options); + let mut resources = ResourceStorage::default(); + + resources.add_resource(Resource::simple("noopjs", crate::resources::MimeType::ApplicationJavascript, "(() => {})()")).unwrap(); + + let result = blocker.check(&Request::new("https://example.com?q=1&test=2#blue", "https://antonok.com", "script").unwrap(), &resources); assert_eq!(result.rewritten_url, Some("https://example.com?q=1#blue".into())); assert!(!result.matched); - let result = blocker.check(&Request::from_urls("https://example.com?test=2&q=1#blue", "https://antonok.com", "script").unwrap()); + let result = blocker.check(&Request::new("https://example.com?test=2&q=1#blue", "https://antonok.com", "script").unwrap(), &resources); assert_eq!(result.rewritten_url, Some("https://example.com?q=1#blue".into())); assert!(!result.matched); - let result = blocker.check(&Request::from_urls("https://example.com?test=2#blue", "https://antonok.com", "script").unwrap()); + let result = blocker.check(&Request::new("https://example.com?test=2#blue", "https://antonok.com", "script").unwrap(), &resources); assert_eq!(result.rewritten_url, Some("https://example.com#blue".into())); assert!(!result.matched); - let result = blocker.check(&Request::from_urls("https://example.com?q=1#blue", "https://antonok.com", "script").unwrap()); + let result = blocker.check(&Request::new("https://example.com?q=1#blue", "https://antonok.com", "script").unwrap(), &resources); assert_eq!(result.rewritten_url, None); assert!(!result.matched); - let result = blocker.check(&Request::from_urls("https://example.com?q=1&test=2", "https://antonok.com", "script").unwrap()); + let result = blocker.check(&Request::new("https://example.com?q=1&test=2", "https://antonok.com", "script").unwrap(), &resources); assert_eq!(result.rewritten_url, Some("https://example.com?q=1".into())); assert!(!result.matched); - let result = blocker.check(&Request::from_urls("https://example.com?test=2&q=1", "https://antonok.com", "script").unwrap()); + let result = blocker.check(&Request::new("https://example.com?test=2&q=1", "https://antonok.com", "script").unwrap(), &resources); assert_eq!(result.rewritten_url, Some("https://example.com?q=1".into())); assert!(!result.matched); - let result = blocker.check(&Request::from_urls("https://example.com?test=2", "https://antonok.com", "script").unwrap()); + let result = blocker.check(&Request::new("https://example.com?test=2", "https://antonok.com", "script").unwrap(), &resources); assert_eq!(result.rewritten_url, Some("https://example.com".into())); assert!(!result.matched); - let result = blocker.check(&Request::from_urls("https://example.com?q=1", "https://antonok.com", "script").unwrap()); + let result = blocker.check(&Request::new("https://example.com?q=1", "https://antonok.com", "script").unwrap(), &resources); assert_eq!(result.rewritten_url, None); assert!(!result.matched); - let result = blocker.check(&Request::from_urls("https://example.com?q=fbclid", "https://antonok.com", "script").unwrap()); + let result = blocker.check(&Request::new("https://example.com?q=fbclid", "https://antonok.com", "script").unwrap(), &resources); assert_eq!(result.rewritten_url, None); assert!(!result.matched); - let result = blocker.check(&Request::from_urls("https://example.com?fbclid=10938&q=1&test=2", "https://antonok.com", "script").unwrap()); + let result = blocker.check(&Request::new("https://example.com?fbclid=10938&q=1&test=2", "https://antonok.com", "script").unwrap(), &resources); assert_eq!(result.rewritten_url, Some("https://example.com?q=1".into())); assert!(!result.matched); - let result = blocker.check(&Request::from_urls("https://test.com?fbclid=10938&q=1&test=2", "https://antonok.com", "script").unwrap()); + let result = blocker.check(&Request::new("https://test.com?fbclid=10938&q=1&test=2", "https://antonok.com", "script").unwrap(), &resources); assert_eq!(result.rewritten_url, Some("https://test.com?q=1&test=2".into())); assert!(!result.matched); - let result = blocker.check(&Request::from_urls("https://example.com?q1=1&q2=2&q3=3&test=2&q4=4&q5=5&fbclid=39", "https://antonok.com", "script").unwrap()); + let result = blocker.check(&Request::new("https://example.com?q1=1&q2=2&q3=3&test=2&q4=4&q5=5&fbclid=39", "https://antonok.com", "script").unwrap(), &resources); assert_eq!(result.rewritten_url, Some("https://example.com?q1=1&q2=2&q3=3&q4=4&q5=5".into())); assert!(!result.matched); - let result = blocker.check(&Request::from_urls("https://example.com?q1=1&q1=2&test=2&test=3", "https://antonok.com", "script").unwrap()); + let result = blocker.check(&Request::new("https://example.com?q1=1&q1=2&test=2&test=3", "https://antonok.com", "script").unwrap(), &resources); assert_eq!(result.rewritten_url, Some("https://example.com?q1=1&q1=2".into())); assert!(!result.matched); - let result = blocker.check(&Request::from_urls("https://example.com/script.js?test=2#blue", "https://antonok.com", "script").unwrap()); + let result = blocker.check(&Request::new("https://example.com/script.js?test=2#blue", "https://antonok.com", "script").unwrap(), &resources); assert_eq!(result.rewritten_url, Some("https://example.com/script.js#blue".into())); assert_eq!(result.redirect, Some("data:application/javascript;base64,KCgpID0+IHt9KSgp".into())); assert!(!result.matched); - let result = blocker.check(&Request::from_urls("https://example.com/block/script.js?test=2", "https://antonok.com", "script").unwrap()); + let result = blocker.check(&Request::new("https://example.com/block/script.js?test=2", "https://antonok.com", "script").unwrap(), &resources); assert_eq!(result.rewritten_url, None); assert_eq!(result.redirect, Some("data:application/javascript;base64,KCgpID0+IHt9KSgp".into())); assert!(result.matched); - let result = blocker.check(&Request::from_urls("https://example.com/Path/?Test=ABC&testcase=AbC&testCase=aBc", "https://antonok.com", "script").unwrap()); + let result = blocker.check(&Request::new("https://example.com/Path/?Test=ABC&testcase=AbC&testCase=aBc", "https://antonok.com", "script").unwrap(), &resources); assert_eq!(result.rewritten_url, Some("https://example.com/Path/?Test=ABC&testcase=AbC".into())); assert!(!result.matched); - let result = blocker.check(&Request::from_urls("https://example.com?Test=ABC?123&test=3#&test=4#b", "https://antonok.com", "script").unwrap()); + let result = blocker.check(&Request::new("https://example.com?Test=ABC?123&test=3#&test=4#b", "https://antonok.com", "script").unwrap(), &resources); assert_eq!(result.rewritten_url, Some("https://example.com?Test=ABC?123#&test=4#b".into())); assert!(!result.matched); - let result = blocker.check(&Request::from_urls("https://example.com?Test=ABC&testCase=5", "https://antonok.com", "document").unwrap()); + let result = blocker.check(&Request::new("https://example.com?Test=ABC&testCase=5", "https://antonok.com", "document").unwrap(), &resources); assert_eq!(result.rewritten_url, Some("https://example.com?Test=ABC".into())); assert!(!result.matched); - let result = blocker.check(&Request::from_urls("https://example.com?Test=ABC&testCase=5", "https://antonok.com", "image").unwrap()); + let result = blocker.check(&Request::new("https://example.com?Test=ABC&testCase=5", "https://antonok.com", "image").unwrap(), &resources); assert_eq!(result.rewritten_url, None); assert!(!result.matched); } @@ -1855,7 +1708,7 @@ mod blocker_tests { /// Tests ported from the previous query parameter stripping logic in brave-core #[test] fn removeparam_brave_core_tests() { - let testcases = vec![ + let testcases = [ // (original url, expected url after filtering) ("https://example.com/?fbclid=1234", "https://example.com/"), ("https://example.com/?fbclid=1234&", "https://example.com/"), @@ -1914,9 +1767,10 @@ mod blocker_tests { }; let blocker = Blocker::new(network_filters, &blocker_options); + let resources = ResourceStorage::default(); for (original, expected) in testcases.into_iter() { - let result = blocker.check(&Request::from_urls(original, "https://example.net", "script").unwrap()); + let result = blocker.check(&Request::new(original, "https://example.net", "script").unwrap(), &resources); let expected = if original == expected { None } else { @@ -1928,9 +1782,9 @@ mod blocker_tests { #[test] fn test_removeparam_same_tokens() { - let filters = vec![ - String::from("$removeparam=example1_"), - String::from("$removeparam=example1-"), + let filters = [ + "$removeparam=example1_", + "$removeparam=example1-", ]; let (network_filters, _) = parse_filters(&filters, true, Default::default()); @@ -1941,20 +1795,20 @@ fn test_removeparam_same_tokens() { let blocker = Blocker::new(network_filters, &blocker_options); - let result = blocker.check(&Request::from_urls("https://example.com?example1_=1&example1-=2", "https://example.com", "script").unwrap()); + let result = blocker.check(&Request::new("https://example.com?example1_=1&example1-=2", "https://example.com", "script").unwrap(), &Default::default()); assert_eq!(result.rewritten_url, Some("https://example.com".into())); assert!(!result.matched); } #[test] fn test_redirect_priority() { - let filters = vec![ - String::from(".txt^$redirect-rule=a"), - String::from("||example.com^$redirect-rule=b:10"), - String::from("/text$redirect-rule=c:20"), - String::from("@@^excepta^$redirect-rule=a"), - String::from("@@^exceptb10^$redirect-rule=b:10"), - String::from("@@^exceptc20^$redirect-rule=c:20"), + let filters = [ + ".txt^$redirect-rule=a", + "||example.com^$redirect-rule=b:10", + "/text$redirect-rule=c:20", + "@@^excepta^$redirect-rule=a", + "@@^exceptb10^$redirect-rule=b:10", + "@@^exceptc20^$redirect-rule=c:20", ]; let (network_filters, _) = parse_filters(&filters, true, Default::default()); @@ -1963,73 +1817,75 @@ fn test_removeparam_same_tokens() { enable_optimizations: true, }; - let mut blocker = Blocker::new(network_filters, &blocker_options); - fn add_simple_resource(blocker: &mut Blocker, identifier: &str) -> Option { - let b64 = base64::encode(identifier); - blocker.add_resource(&Resource { - name: identifier.into(), - aliases: vec![], - kind: crate::resources::ResourceType::Mime(crate::resources::MimeType::TextPlain), - content: base64::encode(identifier), - }).unwrap(); - return Some(format!("data:text/plain;base64,{}", b64)); + let blocker = Blocker::new(network_filters, &blocker_options); + let mut resources = ResourceStorage::default(); + fn add_simple_resource(resources: &mut ResourceStorage, identifier: &str) -> Option { + resources.add_resource(Resource::simple(identifier, crate::resources::MimeType::TextPlain, identifier)).unwrap(); + Some(format!("data:text/plain;base64,{}", base64::encode(identifier))) } - let a_redirect = add_simple_resource(&mut blocker, "a"); - let b_redirect = add_simple_resource(&mut blocker, "b"); - let c_redirect = add_simple_resource(&mut blocker, "c"); + let a_redirect = add_simple_resource(&mut resources, "a"); + let b_redirect = add_simple_resource(&mut resources, "b"); + let c_redirect = add_simple_resource(&mut resources, "c"); - let result = blocker.check(&Request::from_urls("https://example.net/test", "https://example.com", "xmlhttprequest").unwrap()); + let result = blocker.check(&Request::new("https://example.net/test", "https://example.com", "xmlhttprequest").unwrap(), &resources); assert_eq!(result.redirect, None); assert!(!result.matched); - let result = blocker.check(&Request::from_urls("https://example.net/test.txt", "https://example.com", "xmlhttprequest").unwrap()); + let result = blocker.check(&Request::new("https://example.net/test.txt", "https://example.com", "xmlhttprequest").unwrap(), &resources); assert_eq!(result.redirect, a_redirect); assert!(!result.matched); - let result = blocker.check(&Request::from_urls("https://example.com/test.txt", "https://example.com", "xmlhttprequest").unwrap()); + let result = blocker.check(&Request::new("https://example.com/test.txt", "https://example.com", "xmlhttprequest").unwrap(), &resources); assert_eq!(result.redirect, b_redirect); assert!(!result.matched); - let result = blocker.check(&Request::from_urls("https://example.com/text.txt", "https://example.com", "xmlhttprequest").unwrap()); + let result = blocker.check(&Request::new("https://example.com/text.txt", "https://example.com", "xmlhttprequest").unwrap(), &resources); assert_eq!(result.redirect, c_redirect); assert!(!result.matched); - let result = blocker.check(&Request::from_urls("https://example.com/exceptc20/text.txt", "https://example.com", "xmlhttprequest").unwrap()); + let result = blocker.check(&Request::new("https://example.com/exceptc20/text.txt", "https://example.com", "xmlhttprequest").unwrap(), &resources); assert_eq!(result.redirect, b_redirect); assert!(!result.matched); - let result = blocker.check(&Request::from_urls("https://example.com/exceptb10/text.txt", "https://example.com", "xmlhttprequest").unwrap()); + let result = blocker.check(&Request::new("https://example.com/exceptb10/text.txt", "https://example.com", "xmlhttprequest").unwrap(), &resources); assert_eq!(result.redirect, c_redirect); assert!(!result.matched); - let result = blocker.check(&Request::from_urls("https://example.com/exceptc20/exceptb10/text.txt", "https://example.com", "xmlhttprequest").unwrap()); + let result = blocker.check(&Request::new("https://example.com/exceptc20/exceptb10/text.txt", "https://example.com", "xmlhttprequest").unwrap(), &resources); assert_eq!(result.redirect, a_redirect); assert!(!result.matched); - let result = blocker.check(&Request::from_urls("https://example.com/exceptc20/exceptb10/excepta/text.txt", "https://example.com", "xmlhttprequest").unwrap()); + let result = blocker.check(&Request::new("https://example.com/exceptc20/exceptb10/excepta/text.txt", "https://example.com", "xmlhttprequest").unwrap(), &resources); assert_eq!(result.redirect, None); assert!(!result.matched); - let result = blocker.check(&Request::from_urls("https://example.com/exceptc20/exceptb10/text", "https://example.com", "xmlhttprequest").unwrap()); + let result = blocker.check(&Request::new("https://example.com/exceptc20/exceptb10/text", "https://example.com", "xmlhttprequest").unwrap(), &resources); assert_eq!(result.redirect, None); assert!(!result.matched); } #[test] fn tags_enable_works() { - let filters = vec![ - String::from("adv$tag=stuff"), - String::from("somelongpath/test$tag=stuff"), - String::from("||brianbondy.com/$tag=brian"), - String::from("||brave.com$tag=brian"), + let filters = [ + "adv$tag=stuff", + "somelongpath/test$tag=stuff", + "||brianbondy.com/$tag=brian", + "||brave.com$tag=brian", ]; - let url_results = vec![ - (Request::from_url("http://example.com/advert.html").unwrap(), true), - (Request::from_url("http://example.com/somelongpath/test/2.html").unwrap(), true), - (Request::from_url("https://brianbondy.com/about").unwrap(), false), - (Request::from_url("https://brave.com/about").unwrap(), false), + let url_results = [ + ("http://example.com/advert.html", true), + ("http://example.com/somelongpath/test/2.html", true), + ("https://brianbondy.com/about", false), + ("https://brave.com/about", false), ]; + let request_expectations: Vec<_> = url_results + .into_iter() + .map(|(url, expected_result)| { + let request = Request::new(url, "https://example.com", "other").unwrap(); + (request, expected_result) + }).collect(); + let (network_filters, _) = parse_filters(&filters, true, Default::default()); let blocker_options: BlockerOptions = BlockerOptions { @@ -2037,12 +1893,13 @@ fn test_removeparam_same_tokens() { }; let mut blocker = Blocker::new(network_filters, &blocker_options); + let resources = Default::default(); blocker.enable_tags(&["stuff"]); - assert_eq!(blocker.tags_enabled, HashSet::from_iter(vec![String::from("stuff")].into_iter())); + assert_eq!(blocker.tags_enabled, HashSet::from_iter([String::from("stuff")].into_iter())); assert_eq!(vec_hashmap_len(&blocker.filters_tagged.filter_map), 2); - url_results.into_iter().for_each(|(req, expected_result)| { - let matched_rule = blocker.check(&req); + request_expectations.into_iter().for_each(|(req, expected_result)| { + let matched_rule = blocker.check(&req, &resources); if expected_result { assert!(matched_rule.matched, "Expected match for {}", req.url); } else { @@ -2053,19 +1910,26 @@ fn test_removeparam_same_tokens() { #[test] fn tags_enable_adds_tags() { - let filters = vec![ - String::from("adv$tag=stuff"), - String::from("somelongpath/test$tag=stuff"), - String::from("||brianbondy.com/$tag=brian"), - String::from("||brave.com$tag=brian"), + let filters = [ + "adv$tag=stuff", + "somelongpath/test$tag=stuff", + "||brianbondy.com/$tag=brian", + "||brave.com$tag=brian", ]; - let url_results = vec![ - (Request::from_url("http://example.com/advert.html").unwrap(), true), - (Request::from_url("http://example.com/somelongpath/test/2.html").unwrap(), true), - (Request::from_url("https://brianbondy.com/about").unwrap(), true), - (Request::from_url("https://brave.com/about").unwrap(), true), + let url_results = [ + ("http://example.com/advert.html", true), + ("http://example.com/somelongpath/test/2.html", true), + ("https://brianbondy.com/about", true), + ("https://brave.com/about", true), ]; + let request_expectations: Vec<_> = url_results + .into_iter() + .map(|(url, expected_result)| { + let request = Request::new(url, "https://example.com", "other").unwrap(); + (request, expected_result) + }).collect(); + let (network_filters, _) = parse_filters(&filters, true, Default::default()); let blocker_options: BlockerOptions = BlockerOptions { @@ -2073,13 +1937,14 @@ fn test_removeparam_same_tokens() { }; let mut blocker = Blocker::new(network_filters, &blocker_options); + let resources = Default::default(); blocker.enable_tags(&["stuff"]); blocker.enable_tags(&["brian"]); - assert_eq!(blocker.tags_enabled, HashSet::from_iter(vec![String::from("brian"), String::from("stuff")].into_iter())); + assert_eq!(blocker.tags_enabled, HashSet::from_iter([String::from("brian"), String::from("stuff")].into_iter())); assert_eq!(vec_hashmap_len(&blocker.filters_tagged.filter_map), 4); - url_results.into_iter().for_each(|(req, expected_result)| { - let matched_rule = blocker.check(&req); + request_expectations.into_iter().for_each(|(req, expected_result)| { + let matched_rule = blocker.check(&req, &resources); if expected_result { assert!(matched_rule.matched, "Expected match for {}", req.url); } else { @@ -2090,19 +1955,26 @@ fn test_removeparam_same_tokens() { #[test] fn tags_disable_works() { - let filters = vec![ - String::from("adv$tag=stuff"), - String::from("somelongpath/test$tag=stuff"), - String::from("||brianbondy.com/$tag=brian"), - String::from("||brave.com$tag=brian"), + let filters = [ + "adv$tag=stuff", + "somelongpath/test$tag=stuff", + "||brianbondy.com/$tag=brian", + "||brave.com$tag=brian", ]; - let url_results = vec![ - (Request::from_url("http://example.com/advert.html").unwrap(), false), - (Request::from_url("http://example.com/somelongpath/test/2.html").unwrap(), false), - (Request::from_url("https://brianbondy.com/about").unwrap(), true), - (Request::from_url("https://brave.com/about").unwrap(), true), + let url_results = [ + ("http://example.com/advert.html", false), + ("http://example.com/somelongpath/test/2.html", false), + ("https://brianbondy.com/about", true), + ("https://brave.com/about", true), ]; + let request_expectations: Vec<_> = url_results + .into_iter() + .map(|(url, expected_result)| { + let request = Request::new(url, "https://example.com", "other").unwrap(); + (request, expected_result) + }).collect(); + let (network_filters, _) = parse_filters(&filters, true, Default::default()); let blocker_options: BlockerOptions = BlockerOptions { @@ -2110,15 +1982,16 @@ fn test_removeparam_same_tokens() { }; let mut blocker = Blocker::new(network_filters, &blocker_options); + let resources = Default::default(); blocker.enable_tags(&["brian", "stuff"]); - assert_eq!(blocker.tags_enabled, HashSet::from_iter(vec![String::from("brian"), String::from("stuff")].into_iter())); + assert_eq!(blocker.tags_enabled, HashSet::from_iter([String::from("brian"), String::from("stuff")].into_iter())); assert_eq!(vec_hashmap_len(&blocker.filters_tagged.filter_map), 4); blocker.disable_tags(&["stuff"]); - assert_eq!(blocker.tags_enabled, HashSet::from_iter(vec![String::from("brian")].into_iter())); + assert_eq!(blocker.tags_enabled, HashSet::from_iter([String::from("brian")].into_iter())); assert_eq!(vec_hashmap_len(&blocker.filters_tagged.filter_map), 2); - url_results.into_iter().for_each(|(req, expected_result)| { - let matched_rule = blocker.check(&req); + request_expectations.into_iter().for_each(|(req, expected_result)| { + let matched_rule = blocker.check(&req, &resources); if expected_result { assert!(matched_rule.matched, "Expected match for {}", req.url); } else { @@ -2182,6 +2055,7 @@ fn test_removeparam_same_tokens() { }; let mut blocker = Blocker::new(Vec::new(), &blocker_options); + let resources = Default::default(); blocker.enable_tags(&["brian"]); blocker.add_filter(NetworkFilter::parse("adv$tag=stuff", true, Default::default()).unwrap()).unwrap(); @@ -2189,15 +2063,22 @@ fn test_removeparam_same_tokens() { blocker.add_filter(NetworkFilter::parse("||brianbondy.com/$tag=brian", true, Default::default()).unwrap()).unwrap(); blocker.add_filter(NetworkFilter::parse("||brave.com$tag=brian", true, Default::default()).unwrap()).unwrap(); - let url_results = vec![ - (Request::from_url("http://example.com/advert.html").unwrap(), false), - (Request::from_url("http://example.com/somelongpath/test/2.html").unwrap(), false), - (Request::from_url("https://brianbondy.com/about").unwrap(), true), - (Request::from_url("https://brave.com/about").unwrap(), true), + let url_results = [ + ("http://example.com/advert.html", false), + ("http://example.com/somelongpath/test/2.html", false), + ("https://brianbondy.com/about", true), + ("https://brave.com/about", true), ]; - url_results.into_iter().for_each(|(req, expected_result)| { - let matched_rule = blocker.check(&req); + let request_expectations: Vec<_> = url_results + .into_iter() + .map(|(url, expected_result)| { + let request = Request::new(url, "https://example.com", "other").unwrap(); + (request, expected_result) + }).collect(); + + request_expectations.into_iter().for_each(|(req, expected_result)| { + let matched_rule = blocker.check(&req, &resources); if expected_result { assert!(matched_rule.matched, "Expected match for {}", req.url); } else { @@ -2213,12 +2094,13 @@ fn test_removeparam_same_tokens() { }; let mut blocker = Blocker::new(Vec::new(), &blocker_options); + let resources = Default::default(); blocker.add_filter(NetworkFilter::parse("@@*ad_banner.png", true, Default::default()).unwrap()).unwrap(); - let request = Request::from_url("http://example.com/ad_banner.png").unwrap(); + let request = Request::new("http://example.com/ad_banner.png", "https://example.com", "other").unwrap(); - let matched_rule = blocker.check_parameterised(&request, false, true); + let matched_rule = blocker.check_parameterised(&request, &resources, false, true); assert!(!matched_rule.matched); assert!(matched_rule.exception.is_some()); } @@ -2233,13 +2115,13 @@ fn test_removeparam_same_tokens() { blocker.add_filter(NetworkFilter::parse("@@||example.com$generichide", true, Default::default()).unwrap()).unwrap(); - assert!(blocker.check_generic_hide(&Request::from_url("https://example.com").unwrap())); + assert!(blocker.check_generic_hide(&Request::new("https://example.com", "https://example.com", "other").unwrap())); } } #[cfg(test)] mod legacy_rule_parsing_tests { - use crate::utils::rules_from_lists; + use crate::test_utils::rules_from_lists; use crate::lists::{parse_filters, FilterFormat, ParseOptions}; use crate::blocker::{Blocker, BlockerOptions}; use crate::blocker::vec_hashmap_len; @@ -2289,10 +2171,10 @@ mod legacy_rule_parsing_tests { const MALWARE_DOMAIN_LIST: ListCounts = ListCounts { filters: 1104, cosmetic_filters: 0, exceptions: 0, duplicates: 3 }; const MALWARE_DOMAINS: ListCounts = ListCounts { filters: 26853, cosmetic_filters: 0, exceptions: 0, duplicates: 48 }; - fn check_list_counts(rule_lists: &[String], format: FilterFormat, expectation: ListCounts) { + fn check_list_counts(rule_lists: impl IntoIterator>, format: FilterFormat, expectation: ListCounts) { let rules = rules_from_lists(rule_lists); - let (network_filters, cosmetic_filters) = parse_filters(&rules, true, ParseOptions { format, ..Default::default() }); + let (network_filters, cosmetic_filters) = parse_filters(rules, true, ParseOptions { format, ..Default::default() }); assert_eq!( (network_filters.len(), @@ -2323,58 +2205,58 @@ mod legacy_rule_parsing_tests { #[test] fn parse_easylist() { - check_list_counts(&vec![String::from("./data/test/easylist.txt")], FilterFormat::Standard, EASY_LIST); + check_list_counts(["./data/test/easylist.txt"], FilterFormat::Standard, EASY_LIST); } #[test] fn parse_easyprivacy() { - check_list_counts(&vec![String::from("./data/test/easyprivacy.txt")], FilterFormat::Standard, EASY_PRIVACY); + check_list_counts(["./data/test/easyprivacy.txt"], FilterFormat::Standard, EASY_PRIVACY); } #[test] fn parse_ublock_unbreak() { - check_list_counts(&vec![String::from("./data/test/ublock-unbreak.txt")], FilterFormat::Standard, UBLOCK_UNBREAK); + check_list_counts(["./data/test/ublock-unbreak.txt"], FilterFormat::Standard, UBLOCK_UNBREAK); } #[test] fn parse_brave_unbreak() { - check_list_counts(&vec![String::from("./data/test/brave-unbreak.txt")], FilterFormat::Standard, BRAVE_UNBREAK); + check_list_counts(["./data/test/brave-unbreak.txt"], FilterFormat::Standard, BRAVE_UNBREAK); } #[test] fn parse_brave_disconnect_simple_malware() { - check_list_counts(&vec![String::from("./data/test/disconnect-simple-malware.txt")], FilterFormat::Standard, DISCONNECT_SIMPLE_MALWARE); + check_list_counts(["./data/test/disconnect-simple-malware.txt"], FilterFormat::Standard, DISCONNECT_SIMPLE_MALWARE); } #[test] fn parse_spam404_main_blacklist() { - check_list_counts(&vec![String::from("./data/test/spam404-main-blacklist.txt")], FilterFormat::Standard, SPAM_404_MAIN_BLACKLIST); + check_list_counts(["./data/test/spam404-main-blacklist.txt"], FilterFormat::Standard, SPAM_404_MAIN_BLACKLIST); } #[test] fn parse_malware_domain_list() { - check_list_counts(&vec![String::from("./data/test/malwaredomainlist.txt")], FilterFormat::Hosts, MALWARE_DOMAIN_LIST); + check_list_counts(["./data/test/malwaredomainlist.txt"], FilterFormat::Hosts, MALWARE_DOMAIN_LIST); } #[test] fn parse_malware_domain_list_just_hosts() { - check_list_counts(&vec![String::from("./data/test/malwaredomainlist_justhosts.txt")], FilterFormat::Hosts, MALWARE_DOMAIN_LIST); + check_list_counts(["./data/test/malwaredomainlist_justhosts.txt"], FilterFormat::Hosts, MALWARE_DOMAIN_LIST); } #[test] fn parse_malware_domains() { - check_list_counts(&vec![String::from("./data/test/malwaredomains.txt")], FilterFormat::Hosts, MALWARE_DOMAINS); + check_list_counts(["./data/test/malwaredomains.txt"], FilterFormat::Hosts, MALWARE_DOMAINS); } #[test] fn parse_multilist() { let expectation = EASY_LIST + EASY_PRIVACY + UBLOCK_UNBREAK + BRAVE_UNBREAK; check_list_counts( - &vec![ - String::from("./data/test/easylist.txt"), - String::from("./data/test/easyprivacy.txt"), - String::from("./data/test/ublock-unbreak.txt"), - String::from("./data/test/brave-unbreak.txt"), + [ + "./data/test/easylist.txt", + "./data/test/easyprivacy.txt", + "./data/test/ublock-unbreak.txt", + "./data/test/brave-unbreak.txt", ], FilterFormat::Standard, expectation, @@ -2385,9 +2267,9 @@ mod legacy_rule_parsing_tests { fn parse_malware_multilist() { let expectation = SPAM_404_MAIN_BLACKLIST + DISCONNECT_SIMPLE_MALWARE; check_list_counts( - &vec![ - String::from("./data/test/spam404-main-blacklist.txt"), - String::from("./data/test/disconnect-simple-malware.txt"), + [ + "./data/test/spam404-main-blacklist.txt", + "./data/test/disconnect-simple-malware.txt", ], FilterFormat::Standard, expectation, @@ -2399,9 +2281,9 @@ mod legacy_rule_parsing_tests { let mut expectation = MALWARE_DOMAIN_LIST + MALWARE_DOMAINS; expectation.duplicates = 69; check_list_counts( - &vec![ - String::from("./data/test/malwaredomainlist.txt"), - String::from("./data/test/malwaredomains.txt"), + [ + "./data/test/malwaredomainlist.txt", + "./data/test/malwaredomains.txt", ], FilterFormat::Hosts, expectation, diff --git a/third_party/rust/adblock/v0_7/crate/src/content_blocking.rs b/third_party/rust/adblock/v0_8/crate/src/content_blocking.rs similarity index 96% rename from third_party/rust/adblock/v0_7/crate/src/content_blocking.rs rename to third_party/rust/adblock/v0_8/crate/src/content_blocking.rs index cff111fb6b3b..c324eaa8ee77 100644 --- a/third_party/rust/adblock/v0_7/crate/src/content_blocking.rs +++ b/third_party/rust/adblock/v0_8/crate/src/content_blocking.rs @@ -55,6 +55,7 @@ impl CbRule { } } +/// Corresponds to the `action` field of a Safari content blocking rule. #[derive(Clone, Debug, PartialEq, Deserialize, Serialize)] pub struct CbAction { #[serde(rename = "type")] @@ -67,6 +68,7 @@ pub struct CbAction { pub selector: Option, } +/// Corresponds to the `action.type` field of a Safari content blocking rule. #[derive(Clone, Debug, PartialEq, Deserialize, Serialize)] #[serde(rename_all = "kebab-case")] pub enum CbType { @@ -86,6 +88,8 @@ pub enum CbType { MakeHttps, } +/// Corresponds to possible entries in the `trigger.load_type` field of a Safari content blocking +/// rule. #[derive(Clone, Debug, PartialEq, Deserialize, Serialize)] #[serde(rename_all = "kebab-case")] pub enum CbLoadType { @@ -93,6 +97,8 @@ pub enum CbLoadType { ThirdParty, } +/// Corresponds to possible entries in the `trigger.resource_type` field of a Safari content +/// blocking rule. #[derive(Clone, Debug, PartialEq, Eq, Hash, Deserialize, Serialize)] #[serde(rename_all = "kebab-case")] pub enum CbResourceType { @@ -107,6 +113,7 @@ pub enum CbResourceType { Popup, } +/// Corresponds to the `trigger` field of a Safari content blocking rule. #[derive(Clone, Debug, Default, PartialEq, Deserialize, Serialize)] #[serde(rename_all = "kebab-case")] pub struct CbTrigger { @@ -149,6 +156,8 @@ pub struct CbTrigger { pub unless_top_url: Option>, } +/// Possible failure reasons when attempting to convert an adblock rule into content filtering +/// syntax. #[derive(Debug)] pub enum CbRuleCreationFailure { /// Currently, only filter rules parsed in debug mode can be translated into equivalent content @@ -185,9 +194,9 @@ pub enum CbRuleCreationFailure { /// Cosmetic rules with entities (e.g. google.*) rather than hostnames cannot be represented in /// content blocking syntax. CosmeticEntitiesUnsupported, - /// Cosmetic rules with custom style specification (i.e. `:style(...)`) cannot be represented + /// Cosmetic rules with custom action specification (i.e. `:style(...)`) cannot be represented /// in content blocking syntax. - CosmeticStyleRulesNotSupported, + CosmeticActionRulesNotSupported, /// Cosmetic rules with scriptlet injections (i.e. `+js(...)`) cannot be represented in content /// blocking syntax. ScriptletInjectionsNotSupported, @@ -247,6 +256,7 @@ impl IntoIterator for CbRuleEquivalent { } } +/// Returned by [`CbRuleEquivalent`]'s `IntoIterator` implementation. pub struct CbRuleEquivalentIterator { rules: [Option; 2], index: usize, @@ -543,8 +553,8 @@ impl TryFrom for CbRule { fn try_from(v: CosmeticFilter) -> Result { use crate::filters::cosmetic::{CosmeticFilterLocationType, CosmeticFilterMask}; - if v.style.is_some() { - return Err(CbRuleCreationFailure::CosmeticStyleRulesNotSupported); + if v.action.is_some() { + return Err(CbRuleCreationFailure::CosmeticActionRulesNotSupported); } if v.mask.contains(CosmeticFilterMask::SCRIPT_INJECT) { return Err(CbRuleCreationFailure::ScriptletInjectionsNotSupported); @@ -1276,26 +1286,23 @@ mod ab2cb_tests { #[cfg(test)] mod filterset_tests { use crate::lists::{FilterSet, ParseOptions, RuleTypes}; - use once_cell::sync::Lazy; - - static FILTER_LIST: Lazy<[String; 6]> = Lazy::new(|| { - [ - String::from("||example.com^$script"), - String::from("||test.net^$image,third-party"), - String::from("/trackme.js^$script"), - String::from("example.com##.ad-banner"), - String::from("##.ad-640x480"), - String::from("##p.sponsored"), - ] - }); + + const FILTER_LIST: &[&str] = &[ + "||example.com^$script", + "||test.net^$image,third-party", + "/trackme.js^$script", + "example.com##.ad-banner", + "##.ad-640x480", + "##p.sponsored", + ]; #[test] fn convert_all_rules() -> Result<(), ()> { let mut set = FilterSet::new(true); - set.add_filters(&*FILTER_LIST, Default::default()); + set.add_filters(FILTER_LIST, Default::default()); let (cb_rules, used_rules) = set.into_content_blocking()?; - assert_eq!(used_rules, &*FILTER_LIST); + assert_eq!(used_rules, FILTER_LIST); // All 6 rules plus `ignore_previous_fp_documents()` assert_eq!(cb_rules.len(), 7); @@ -1311,7 +1318,7 @@ mod filterset_tests { }; let mut set = FilterSet::new(true); - set.add_filters(&*FILTER_LIST, parse_opts); + set.add_filters(FILTER_LIST, parse_opts); let (cb_rules, used_rules) = set.into_content_blocking()?; assert_eq!(used_rules, &FILTER_LIST[0..3]); @@ -1330,7 +1337,7 @@ mod filterset_tests { }; let mut set = FilterSet::new(true); - set.add_filters(&*FILTER_LIST, parse_opts); + set.add_filters(FILTER_LIST, parse_opts); let (cb_rules, used_rules) = set.into_content_blocking()?; assert_eq!(used_rules, &FILTER_LIST[3..6]); @@ -1344,15 +1351,15 @@ mod filterset_tests { #[test] fn ignore_unsupported_rules() -> Result<(), ()> { let mut set = FilterSet::new(true); - set.add_filters(&*FILTER_LIST, Default::default()); - set.add_filters(&[ + set.add_filters(FILTER_LIST, Default::default()); + set.add_filters([ // unicode characters - "||rgmechanics.info/uploads/660Ñ…90_".to_string(), - "||insaattrendy.com/Upload/bükerbanner*.jpg".to_string(), + "||rgmechanics.info/uploads/660Ñ…90_", + "||insaattrendy.com/Upload/bükerbanner*.jpg", ], Default::default()); let (cb_rules, used_rules) = set.into_content_blocking()?; - assert_eq!(used_rules, &*FILTER_LIST); + assert_eq!(used_rules, FILTER_LIST); // All 6 rules plus `ignore_previous_fp_documents()` assert_eq!(cb_rules.len(), 7); @@ -1363,7 +1370,7 @@ mod filterset_tests { #[test] fn punycode_if_domains() -> Result<(), ()> { let list = [ - "smskaraborg.se,örnsköldsviksgymnasium.se,mojligheternashusab.se##.env-modal-dialog__backdrop".to_string(), + "smskaraborg.se,örnsköldsviksgymnasium.se,mojligheternashusab.se##.env-modal-dialog__backdrop", ]; let mut set = FilterSet::new(true); set.add_filters(&list, Default::default()); @@ -1372,7 +1379,8 @@ mod filterset_tests { assert_eq!(used_rules, list); assert_eq!(cb_rules.len(), 1); - assert_eq!(cb_rules[0].trigger.if_domain, Some(vec!["smskaraborg.se".to_string(), "xn--rnskldsviksgymnasium-29be.se".to_string(), "mojligheternashusab.se".to_string()])); + assert!(cb_rules[0].trigger.if_domain.is_some()); + assert_eq!(cb_rules[0].trigger.if_domain.as_ref().unwrap(), &["smskaraborg.se", "xn--rnskldsviksgymnasium-29be.se", "mojligheternashusab.se"]); Ok(()) } diff --git a/third_party/rust/adblock/v0_8/crate/src/cosmetic_filter_cache.rs b/third_party/rust/adblock/v0_8/crate/src/cosmetic_filter_cache.rs new file mode 100644 index 000000000000..a62f3cb2245f --- /dev/null +++ b/third_party/rust/adblock/v0_8/crate/src/cosmetic_filter_cache.rs @@ -0,0 +1,1057 @@ +//! Provides behavior related to cosmetic filtering - that is, modifying a page's contents after +//! it's been loaded into a browser. This is primarily used to hide or clean up unwanted page +//! elements that are served inline with the rest of the first-party content from a page, but can +//! also be used to inject JavaScript "scriptlets" that intercept and modify the behavior of +//! scripts on the page at runtime. +//! +//! The primary API exposed by this module is the `CosmeticFilterCache` struct, which stores +//! cosmetic filters and allows them to be queried efficiently at runtime for any which may be +//! relevant to a particular page. + +use crate::filters::cosmetic::CosmeticFilter; +use crate::filters::cosmetic::CosmeticFilterMask; +use crate::resources::{PermissionMask, ResourceStorage}; +use crate::utils::Hash; + +use std::collections::{HashMap, HashSet}; + +use serde::{Deserialize, Serialize}; + +/// Contains cosmetic filter information intended to be used on a particular URL. +#[derive(Debug, PartialEq, Eq, Deserialize, Serialize)] +pub struct UrlSpecificResources { + /// `hide_selectors` is a set of any CSS selector on the page that should be hidden, i.e. + /// styled as `{ display: none !important; }`. + pub hide_selectors: HashSet, + /// `style_selectors` is a map of CSS selectors on the page to respective non-hide style rules, + /// i.e. any required styles other than `display: none`. + pub style_selectors: HashMap>, + /// `remove_selectors` is a set of any CSS selector on the page that should be removed from the + /// DOM. + pub remove_selectors: HashSet, + /// `remove_attrs` is a map of CSS selectors on the page to respective HTML attributes that + /// should be removed from matching elements. + pub remove_attrs: HashMap>, + /// `remove_attrs` is a map of CSS selectors on the page to respective CSS classes that should + /// be removed from matching elements. + pub remove_classes: HashMap>, + /// `exceptions` is a set of any class or id CSS selectors that should not have generic rules + /// applied. In practice, these should be passed to `class_id_stylesheet` and not used + /// otherwise. + pub exceptions: HashSet, + /// `injected_script` is the Javascript code for any scriptlets that should be injected into + /// the page. + pub injected_script: String, + /// `generichide` is set to true if there is a corresponding `$generichide` exception network + /// filter. If so, the page should not query for additional generic rules using + /// `hidden_class_id_selectors`. + pub generichide: bool, +} + +impl UrlSpecificResources { + pub fn empty() -> Self { + Self { + hide_selectors: HashSet::new(), + style_selectors: HashMap::new(), + remove_selectors: HashSet::new(), + remove_attrs: HashMap::new(), + remove_classes: HashMap::new(), + exceptions: HashSet::new(), + injected_script: String::new(), + generichide: false, + } + } +} + +/// The main engine driving cosmetic filtering. +/// +/// There are two primary methods that should be considered when using this in a browser: +/// `hidden_class_id_selectors`, and `url_cosmetic_resources`. +/// +/// Note that cosmetic filtering is imprecise and that this structure is intenionally designed for +/// efficient querying in the context of a browser, optimizing for low memory usage in the page +/// context and good performance. It is *not* designed to provide a 100% accurate report of what +/// will be blocked on any particular page, although when used correctly, all provided rules and +/// scriptlets should be safe to apply. +pub(crate) struct CosmeticFilterCache { + /// Rules that are just the CSS class of an element to be hidden on all sites, e.g. `##.ad`. + pub(crate) simple_class_rules: HashSet, + /// Rules that are just the CSS id of an element to be hidden on all sites, e.g. `###banner`. + pub(crate) simple_id_rules: HashSet, + /// Rules that are the CSS selector of an element to be hidden on all sites, starting with a + /// class, e.g. `##.ad image`. + pub(crate) complex_class_rules: HashMap>, + /// Rules that are the CSS selector of an element to be hidden on all sites, starting with an + /// id, e.g. `###banner > .text a`. + pub(crate) complex_id_rules: HashMap>, + + pub(crate) specific_rules: HostnameRuleDb, + + /// Rules that are the CSS selector of an element to be hidden on all sites that do not fit + /// into any of the class or id buckets above, e.g. `##a[href="https://malware.com"]` + pub(crate) misc_generic_selectors: HashSet, +} + +impl CosmeticFilterCache { + pub fn new() -> Self { + Self { + simple_class_rules: HashSet::new(), + simple_id_rules: HashSet::new(), + complex_class_rules: HashMap::new(), + complex_id_rules: HashMap::new(), + + specific_rules: HostnameRuleDb::default(), + + misc_generic_selectors: HashSet::new(), + } + } + + pub fn from_rules(rules: Vec) -> Self { + let mut self_ = Self { + simple_class_rules: HashSet::with_capacity(rules.len() / 2), + simple_id_rules: HashSet::with_capacity(rules.len() / 2), + complex_class_rules: HashMap::with_capacity(rules.len() / 2), + complex_id_rules: HashMap::with_capacity(rules.len() / 2), + + specific_rules: HostnameRuleDb::default(), + + misc_generic_selectors: HashSet::with_capacity(rules.len() / 30), + }; + + for rule in rules { + self_.add_filter(rule) + } + + self_ + } + + pub fn add_filter(&mut self, rule: CosmeticFilter) { + if rule.has_hostname_constraint() { + if let Some(generic_rule) = rule.hidden_generic_rule() { + self.add_generic_filter(generic_rule); + } + self.specific_rules.store_rule(rule); + } else { + self.add_generic_filter(rule); + } + } + + /// Add a filter, assuming it has already been determined to be a generic rule + fn add_generic_filter(&mut self, rule: CosmeticFilter) { + if rule.mask.contains(CosmeticFilterMask::IS_CLASS_SELECTOR) { + if let Some(key) = &rule.key { + let key = key.clone(); + if rule.mask.contains(CosmeticFilterMask::IS_SIMPLE) { + self.simple_class_rules.insert(key); + } else { + if let Some(bucket) = self.complex_class_rules.get_mut(&key) { + bucket.push(rule.selector); + } else { + self.complex_class_rules.insert(key, vec![rule.selector]); + } + } + } + } else if rule.mask.contains(CosmeticFilterMask::IS_ID_SELECTOR) { + if let Some(key) = &rule.key { + let key = key.clone(); + if rule.mask.contains(CosmeticFilterMask::IS_SIMPLE) { + self.simple_id_rules.insert(key); + } else { + if let Some(bucket) = self.complex_id_rules.get_mut(&key) { + bucket.push(rule.selector); + } else { + self.complex_id_rules.insert(key, vec![rule.selector]); + } + } + } + } else { + self.misc_generic_selectors.insert(rule.selector); + } + } + + /// Generic class/id rules are by far the most common type of cosmetic filtering rule, and they + /// apply to all sites. Rather than injecting all of these rules onto every page, which would + /// blow up memory usage, we only inject rules based on classes and ids that actually appear on + /// the page (in practice, a `MutationObserver` is used to identify those elements). We can + /// include rules like `.a-class div#ads > .advertisement`, keyed by the `.a-class` selector, + /// since we know that this rule cannot possibly apply unless there is an `.a-class` element on + /// the page. + /// + /// This method returns all of the generic CSS selectors of elements to hide (i.e. with a + /// `display: none !important` CSS rule) that could possibly be or become relevant to the page + /// given the new classes and ids that have appeared on the page. It guarantees that it will be + /// safe to hide those elements on a particular page by taking into account the page's + /// hostname-specific set of exception rules. + /// + /// The exceptions should be returned directly as they appear in the page's + /// `UrlSpecificResources`. The exceptions, along with the set of already-seen classes and ids, + /// must be cached externally as the cosmetic filtering subsystem here is designed to be + /// stateless with regard to active page sessions. + pub fn hidden_class_id_selectors( + &self, + classes: impl IntoIterator>, + ids: impl IntoIterator>, + exceptions: &HashSet, + ) -> Vec { + let mut selectors = vec![]; + + classes.into_iter().for_each(|class| { + let class = class.as_ref(); + if self.simple_class_rules.contains(class) + && !exceptions.contains(&format!(".{}", class)) + { + selectors.push(format!(".{}", class)); + } + if let Some(bucket) = self.complex_class_rules.get(class) { + selectors.extend(bucket.iter().filter(|sel| !exceptions.contains(*sel)).map(|s| s.to_owned())); + } + }); + ids.into_iter().for_each(|id| { + let id = id.as_ref(); + if self.simple_id_rules.contains(id) && !exceptions.contains(&format!("#{}", id)) { + selectors.push(format!("#{}", id)); + } + if let Some(bucket) = self.complex_id_rules.get(id) { + selectors.extend(bucket.iter().filter(|sel| !exceptions.contains(*sel)).map(|s| s.to_owned())); + } + }); + + selectors + } + + /// Any rules that can't be handled by `hidden_class_id_selectors` are returned by + /// `hostname_cosmetic_resources`. As soon as a page navigation is committed, this method + /// should be queried to get the initial set of cosmetic filtering operations to apply to the + /// page. This provides any rules specifying elements to hide by selectors that are too complex + /// to be returned by `hidden_class_id_selectors` (i.e. not directly starting with a class or + /// id selector, like `div[class*="Ads"]`), or any rule that is only applicable to a particular + /// hostname or set of hostnames (like `example.com##.a-class`). The first category is always + /// injected into every page, and makes up a relatively small number of rules in practice. + pub fn hostname_cosmetic_resources( + &self, + resources: &ResourceStorage, + hostname: &str, + generichide: bool, + ) -> UrlSpecificResources { + let domain_str = { + let (start, end) = crate::url_parser::get_host_domain(hostname); + &hostname[start..end] + }; + + let (request_entities, request_hostnames) = hostname_domain_hashes(hostname, domain_str); + + let mut specific_hide_selectors = HashSet::new(); + let mut style_selectors = HashMap::<_, Vec<_>>::new(); + let mut remove_selectors = HashSet::new(); + let mut remove_attrs = HashMap::<_, Vec<_>>::new(); + let mut remove_classes = HashMap::<_, Vec<_>>::new(); + let mut script_injections = HashMap::<&str, PermissionMask>::new(); + let mut exceptions = HashSet::new(); + + let hashes: Vec<&Hash> = request_entities.iter().chain(request_hostnames.iter()).collect(); + + fn populate_set(hash: &Hash, source_bin: &HostnameFilterBin, dest_set: &mut HashSet) { + if let Some(s) = source_bin.get(hash) { + s.iter().for_each(|s| { dest_set.insert(s.to_owned()); }); + } + } + fn populate_map(hash: &Hash, source_bin: &HostnameFilterBin<(String, String)>, dest_map: &mut HashMap>) { + if let Some(s) = source_bin.get(hash) { + s.iter().for_each(|s| { + dest_map.entry(s.0.to_owned()).and_modify(|v| v.push(s.1.to_owned())).or_insert_with(|| vec![s.1.to_owned()]); + }); + } + } + for hash in hashes.iter() { + populate_set(hash, &self.specific_rules.hide, &mut specific_hide_selectors); + populate_set(hash, &self.specific_rules.remove, &mut remove_selectors); + // special behavior: `script_injections` doesn't have to own the strings yet, since the + // scripts need to be fetched and templated later + if let Some(s) = self.specific_rules.inject_script.get(hash) { + s.iter().for_each(|(s, mask)| { + script_injections.entry(s).and_modify(|entry| *entry |= *mask).or_insert(*mask); + }); + } + + populate_map(hash, &self.specific_rules.style, &mut style_selectors); + populate_map(hash, &self.specific_rules.remove_attr, &mut remove_attrs); + populate_map(hash, &self.specific_rules.remove_class, &mut remove_classes); + } + + fn prune_set(hash: &Hash, source_bin: &HostnameFilterBin, dest_set: &mut HashSet) { + if let Some(s) = source_bin.get(hash) { + s.iter().for_each(|s| { + dest_set.remove(s); + }); + } + } + fn prune_map(hash: &Hash, source_bin: &HostnameFilterBin<(String, String)>, dest_map: &mut HashMap>) { + if let Some(s) = source_bin.get(hash) { + s.iter().for_each(|s| { + if let Some(v) = dest_map.get_mut(&s.0) { + v.retain(|e| e != &s.1); + if v.is_empty() { + dest_map.remove(&s.0); + } + } + }); + } + } + for hash in hashes.iter() { + // special behavior: unhide rules need to go in `exceptions` as well + if let Some(s) = self.specific_rules.unhide.get(hash) { + s.iter().for_each(|s| { + specific_hide_selectors.remove(s); + exceptions.insert(s.to_owned()); + }); + } + prune_set(hash, &self.specific_rules.unremove, &mut remove_selectors); + // same logic but not using prune_set since strings are unowned, (see above) + if let Some(s) = self.specific_rules.uninject_script.get(hash) { + s.iter().for_each(|s| { + script_injections.remove(s.as_str()); + }); + } + + prune_map(hash, &self.specific_rules.unstyle, &mut style_selectors); + prune_map(hash, &self.specific_rules.unremove_attr, &mut remove_attrs); + prune_map(hash, &self.specific_rules.unremove_class, &mut remove_classes); + } + + let hide_selectors = if generichide { + specific_hide_selectors + } else { + let mut hide_selectors = self + .misc_generic_selectors + .difference(&exceptions) + .cloned() + .collect::>(); + specific_hide_selectors.into_iter().for_each(|sel| { + hide_selectors.insert(sel); + }); + hide_selectors + }; + + let mut injected_script = String::new(); + script_injections.iter().for_each(|(s, mask)| { + if let Ok(filled_template) = resources.get_scriptlet_resource(s, *mask) { + injected_script += "try {\n"; + injected_script += &filled_template; + injected_script += "\n} catch ( e ) { }\n"; + } + }); + + UrlSpecificResources { + hide_selectors, + style_selectors, + remove_selectors, + remove_attrs, + remove_classes, + exceptions, + injected_script, + generichide, + } + } +} + +/// Each hostname-specific filter can be pointed to by several different hostnames, and each +/// hostname can correspond to several different filters. To effectively store and access those +/// filters by hostname, all the non-hostname information for filters is stored in per-hostname +/// "buckets" within a Vec, and each bucket is identified by its index. Hostname hashes are used as +/// keys to get the indices of relevant buckets, which are in turn used to retrieve all the filters +/// that apply. +#[derive(Default)] +pub(crate) struct HostnameFilterBin(pub HashMap>); + +impl HostnameFilterBin { + pub fn insert(&mut self, token: &Hash, filter: T) { + if let Some(bucket) = self.0.get_mut(token) { + bucket.push(filter); + } else { + self.0.insert(*token, vec![filter]); + } + } + + fn get(&self, token: &Hash) -> Option<&Vec> { + self.0.get(token) + } +} + +/// Holds filter bins categorized by filter type. +#[derive(Default)] +pub(crate) struct HostnameRuleDb { + /// Simple hostname-specific hide rules, e.g. `example.com##.ad`. + /// + /// The parameter is the rule's CSS selector. + pub hide: HostnameFilterBin, + /// Simple hostname-specific hide exception rules, e.g. `example.com#@#.ad`. + /// + /// The parameter is the rule's CSS selector. + pub unhide: HostnameFilterBin, + /// Hostname-specific rules with a scriptlet to inject along with any arguments, e.g. + /// `example.com##+js(acis, Number.isNan)`. + /// + /// The parameter is the contents of the `+js(...)` syntax construct. + pub inject_script: HostnameFilterBin<(String, PermissionMask)>, + /// Hostname-specific rules to except a scriptlet to inject along with any arguments, e.g. + /// `example.com#@#+js(acis, Number.isNan)`. + /// + /// The parameter is the contents of the `+js(...)` syntax construct. + /// + /// In practice, these rules are extremely rare in filter lists. + pub uninject_script: HostnameFilterBin, + /// Simple hostname-specific rules with a remove action, e.g. `example.com##.ad:remove()`. + /// + /// The parameter is the rule's CSS selector. + pub remove: HostnameFilterBin, + /// Simple hostname-specific remove action exception rules, e.g. `example.com#@#.ad:remove()`. + /// + /// The parameter is the rule's CSS selector. + pub unremove: HostnameFilterBin, + /// Hostname-specific rules with a custom style for an element, e.g. + /// `example.com##.ad:style(margin: 0)`. + /// + /// The parameters are the rule's selector and its additional style. + pub style: HostnameFilterBin<(String, String)>, + /// Hostname-specific exception rules for a custom style for an element, e.g. + /// `example.com#@#.ad:style(margin: 0)`. + /// + /// The parameters are the rule's selector and its additional style. + /// + /// In practice, this kind of rule does not appear in filter lists, although it is not + /// explicitly forbidden according to any syntax documentation. + pub unstyle: HostnameFilterBin<(String, String)>, + /// Simple hostname-specific rules with a remove attribute action, e.g. `example.com##.ad:remove()`. + /// + /// The parameters are the rule's CSS selector and the class to remove. + pub remove_attr: HostnameFilterBin<(String, String)>, + /// Simple hostname-specific remove attribute action exception rules, e.g. `example.com#@#.ad:remove()`. + /// + /// The parameters are the rule's CSS selector and the class to remove. + pub unremove_attr: HostnameFilterBin<(String, String)>, + /// Simple hostname-specific rules with a remove class action, e.g. `example.com##.ad:remove()`. + /// + /// The parameters are the rule's CSS selector and the class to remove. + pub remove_class: HostnameFilterBin<(String, String)>, + /// Simple hostname-specific remove class action exception rules, e.g. `example.com#@#.ad:remove()`. + /// + /// The parameters are the rule's CSS selector and the class to remove. + pub unremove_class: HostnameFilterBin<(String, String)>, +} + +impl HostnameRuleDb { + pub fn store_rule(&mut self, rule: CosmeticFilter) { + use crate::filters::cosmetic::CosmeticFilterAction; + use SpecificFilterType::*; + + let unhide = rule.mask.contains(CosmeticFilterMask::UNHIDE); + let script_inject = rule.mask.contains(CosmeticFilterMask::SCRIPT_INJECT); + let selector = rule.selector; + + let kind = match (unhide, script_inject, rule.action) { + (false, false, None) => Hide(selector), + (true, false, None) => Unhide(selector), + (false, true, None) => InjectScript((selector, rule.permission)), + (true, true, None) => UninjectScript((selector, rule.permission)), + (false, false, Some(CosmeticFilterAction::Style(s))) => Style((selector, s)), + (true, false, Some(CosmeticFilterAction::Style(s)) )=> Unstyle((selector, s)), + (false, false, Some(CosmeticFilterAction::Remove)) => Remove(selector), + (true, false, Some(CosmeticFilterAction::Remove)) => Unremove(selector), + (false, false, Some(CosmeticFilterAction::RemoveClass(c))) => RemoveClass((selector, c)), + (true, false, Some(CosmeticFilterAction::RemoveClass(c))) => UnremoveClass((selector, c)), + (false, false, Some(CosmeticFilterAction::RemoveAttr(a))) => RemoveAttr((selector, a)), + (true, false, Some(CosmeticFilterAction::RemoveAttr(a))) => UnremoveAttr((selector, a)), + (_, true, Some(_)) => return, // shouldn't be possible + }; + + let tokens_to_insert = std::iter::empty() + .chain(rule.hostnames.unwrap_or(Vec::new())) + .chain(rule.entities.unwrap_or(Vec::new())); + + tokens_to_insert.for_each(|t| self.store(&t, kind.clone())); + + let tokens_to_insert_negated = std::iter::empty() + .chain(rule.not_hostnames.unwrap_or(Vec::new())) + .chain(rule.not_entities.unwrap_or(Vec::new())); + + let negated = kind.negated(); + + tokens_to_insert_negated.for_each(|t| self.store(&t, negated.clone())); + } + + fn store(&mut self, token: &Hash, kind: SpecificFilterType) { + use SpecificFilterType::*; + + match kind { + Hide(s) => self.hide.insert(token, s), + Unhide(s) => self.unhide.insert(token, s), + InjectScript(s) => self.inject_script.insert(token, s), + UninjectScript((s, _)) => self.uninject_script.insert(token, s), + Remove(s) => self.remove.insert(token, s), + Unremove(s) => self.unremove.insert(token, s), + Style(s) => self.style.insert(token, s), + Unstyle(s) => self.unstyle.insert(token, s), + RemoveAttr(s) => self.remove_attr.insert(token, s), + UnremoveAttr(s) => self.unremove_attr.insert(token, s), + RemoveClass(s) => self.remove_class.insert(token, s), + UnremoveClass(s) => self.unremove_class.insert(token, s), + } + } +} + +/// Exists to use common logic for binning filters correctly +#[derive(Clone)] +enum SpecificFilterType { + Hide(String), + Unhide(String), + InjectScript((String, PermissionMask)), + UninjectScript((String, PermissionMask)), + Remove(String), + Unremove(String), + Style((String, String)), + Unstyle((String, String)), + RemoveAttr((String, String)), + UnremoveAttr((String, String)), + RemoveClass((String, String)), + UnremoveClass((String, String)), +} + +impl SpecificFilterType { + fn negated(self) -> Self { + match self { + Self::Hide(s) => Self::Unhide(s), + Self::Unhide(s) => Self::Hide(s), + Self::InjectScript(s) => Self::UninjectScript(s), + Self::UninjectScript(s) => Self::InjectScript(s), + Self::Remove(s) => Self::Unremove(s), + Self::Unremove(s) => Self::Remove(s), + Self::Style(s) => Self::Unstyle(s), + Self::Unstyle(s) => Self::Style(s), + Self::RemoveAttr(s) => Self::UnremoveAttr(s), + Self::UnremoveAttr(s) => Self::RemoveAttr(s), + Self::RemoveClass(s) => Self::UnremoveClass(s), + Self::UnremoveClass(s) => Self::RemoveClass(s), + } + } +} + +fn hostname_domain_hashes(hostname: &str, domain: &str) -> (Vec, Vec) { + let request_entities = + crate::filters::cosmetic::get_entity_hashes_from_labels(hostname, domain); + let request_hostnames = + crate::filters::cosmetic::get_hostname_hashes_from_labels(hostname, domain); + + (request_entities, request_hostnames) +} + +#[cfg(test)] +mod cosmetic_cache_tests { + use super::*; + use crate::resources::Resource; + + fn cache_from_rules(rules: Vec<&str>) -> CosmeticFilterCache { + let parsed_rules = rules + .iter() + .map(|r| CosmeticFilter::parse(r, false, Default::default()).unwrap()) + .collect::>(); + + CosmeticFilterCache::from_rules(parsed_rules) + } + + #[test] + fn exceptions() { + let cfcache = cache_from_rules(vec!["~example.com##.item", "sub.example.com#@#.item2"]); + let resources = ResourceStorage::default(); + + let out = cfcache.hostname_cosmetic_resources(&resources, "test.com", false); + let mut expected = UrlSpecificResources::empty(); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "example.com", false); + expected.exceptions.insert(".item".into()); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "sub.example.com", false); + expected.exceptions.insert(".item2".into()); + assert_eq!(out, expected); + } + + #[test] + fn exceptions2() { + let cfcache = cache_from_rules(vec!["example.com,~sub.example.com##.item"]); + let resources = ResourceStorage::default(); + + let out = cfcache.hostname_cosmetic_resources(&resources, "test.com", false); + let mut expected = UrlSpecificResources::empty(); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "example.com", false); + expected.hide_selectors.insert(".item".to_owned()); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "sub.example.com", false); + let mut expected = UrlSpecificResources::empty(); + expected.exceptions.insert(".item".into()); + assert_eq!(out, expected); + } + + #[test] + fn style_exceptions() { + let cfcache = cache_from_rules(vec![ + "example.com,~sub.example.com##.element:style(background: #fff)", + "sub.test.example.com#@#.element:style(background: #fff)", + "a1.sub.example.com##.element", + "a2.sub.example.com##.element:style(background: #000)", + "a3.example.com##.element:style(background: #000)", + ]); + let resources = ResourceStorage::default(); + + let out = cfcache.hostname_cosmetic_resources(&resources, "sub.example.com", false); + let mut expected = UrlSpecificResources::empty(); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "sub.test.example.com", false); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "a1.sub.example.com", false); + expected.hide_selectors.insert(".element".to_owned()); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "test.example.com", false); + expected.hide_selectors.clear(); + expected + .style_selectors + .insert(".element".to_owned(), vec!["background: #fff".to_owned()]); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "a2.sub.example.com", false); + expected.style_selectors.clear(); + expected + .style_selectors + .insert(".element".to_owned(), vec!["background: #000".to_owned()]); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "a3.example.com", false); + expected.style_selectors.clear(); + expected + .style_selectors + .insert(".element".to_owned(), vec!["background: #000".to_owned(), "background: #fff".to_owned()]); + // order is non-deterministic + if out != expected { + expected + .style_selectors + .get_mut(".element") + .unwrap() + .reverse(); + assert_eq!(out, expected); + } + } + + #[test] + fn script_exceptions() { + use crate::resources::{MimeType, ResourceType}; + + let cfcache = cache_from_rules(vec![ + "example.com,~sub.example.com##+js(set-constant.js, atob, trueFunc)", + "sub.test.example.com#@#+js(set-constant.js, atob, trueFunc)", + "cosmetic.net##+js(nowebrtc.js)", + "g.cosmetic.net##+js(window.open-defuser.js)", + "c.g.cosmetic.net#@#+js(nowebrtc.js)", + ]); + let resources = ResourceStorage::from_resources([ + Resource { + name: "set-constant.js".into(), + aliases: vec![], + kind: ResourceType::Template, + content: base64::encode("set-constant.js, {{1}}, {{2}}"), + dependencies: vec![], + permission: Default::default(), + }, + Resource::simple("nowebrtc.js", MimeType::ApplicationJavascript, "nowebrtc.js"), + Resource::simple("window.open-defuser.js", MimeType::ApplicationJavascript, "window.open-defuser.js"), + ]); + + let out = cfcache.hostname_cosmetic_resources(&resources, "sub.example.com", false); + let mut expected = UrlSpecificResources::empty(); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "sub.test.example.com", false); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "test.example.com", false); + expected.injected_script = + "try {\nset-constant.js, atob, trueFunc\n} catch ( e ) { }\n".to_owned(); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "cosmetic.net", false); + expected.injected_script = "try {\nnowebrtc.js\n} catch ( e ) { }\n".to_owned(); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "g.cosmetic.net", false); + expected.injected_script = "try {\nnowebrtc.js\n} catch ( e ) { }\ntry {\nwindow.open-defuser.js\n} catch ( e ) { }\n".to_owned(); + // order is non-deterministic + if out != expected { + expected.injected_script = "try {\nwindow.open-defuser.js\n} catch ( e ) { }\ntry {\nnowebrtc.js\n} catch ( e ) { }\n".to_owned(); + assert_eq!(out, expected); + } + + let out = cfcache.hostname_cosmetic_resources(&resources, "c.g.cosmetic.net", false); + expected.injected_script = "try {\nwindow.open-defuser.js\n} catch ( e ) { }\n".to_owned(); + assert_eq!(out, expected); + } + + #[test] + fn remove_exceptions() { + let cfcache = cache_from_rules(vec![ + "example.com,~sub.example.com##.element:remove()", + "sub.test.example.com#@#.element:remove()", + "a1.sub.example.com##.element", + "a2.sub.example.com##.element:remove()", + "a3.example.com##.element:remove()", + ]); + let resources = ResourceStorage::default(); + + let out = cfcache.hostname_cosmetic_resources(&resources, "sub.example.com", false); + let mut expected = UrlSpecificResources::empty(); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "sub.test.example.com", false); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "a1.sub.example.com", false); + expected.hide_selectors.insert(".element".to_owned()); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "test.example.com", false); + expected.hide_selectors.clear(); + expected.remove_selectors.insert(".element".to_owned()); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "a2.sub.example.com", false); + expected.remove_selectors.clear(); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "a3.example.com", false); + expected.remove_selectors.clear(); + expected.remove_selectors.insert(".element".to_owned()); + assert_eq!(out, expected); + } + + #[test] + fn remove_attr_exceptions() { + let cfcache = cache_from_rules(vec![ + "example.com,~sub.example.com##.element:remove-attr(style)", + "sub.test.example.com#@#.element:remove-attr(style)", + "a1.sub.example.com##.element", + "a2.sub.example.com##.element:remove-attr(src)", + "a3.example.com##.element:remove-attr(src)", + ]); + let resources = ResourceStorage::default(); + + let out = cfcache.hostname_cosmetic_resources(&resources, "sub.example.com", false); + let mut expected = UrlSpecificResources::empty(); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "sub.test.example.com", false); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "a1.sub.example.com", false); + expected.hide_selectors.insert(".element".to_owned()); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "test.example.com", false); + expected.hide_selectors.clear(); + expected + .remove_attrs + .insert(".element".to_owned(), vec!["style".to_owned()]); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "a2.sub.example.com", false); + expected.remove_attrs.clear(); + expected + .remove_attrs + .insert(".element".to_owned(), vec!["src".to_owned()]); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "a3.example.com", false); + expected.remove_attrs.clear(); + expected + .remove_attrs + .insert(".element".to_owned(), vec!["src".to_owned(), "style".to_owned()]); + // order is non-deterministic + if out != expected { + expected + .remove_attrs + .get_mut(".element") + .unwrap() + .reverse(); + assert_eq!(out, expected); + } + } + + #[test] + fn remove_class_exceptions() { + let cfcache = cache_from_rules(vec![ + "example.com,~sub.example.com##.element:remove-class(overlay)", + "sub.test.example.com#@#.element:remove-class(overlay)", + "a1.sub.example.com##.element", + "a2.sub.example.com##.element:remove-class(banner)", + "a3.example.com##.element:remove-class(banner)", + ]); + let resources = ResourceStorage::default(); + + let out = cfcache.hostname_cosmetic_resources(&resources, "sub.example.com", false); + let mut expected = UrlSpecificResources::empty(); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "sub.test.example.com", false); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "a1.sub.example.com", false); + expected.hide_selectors.insert(".element".to_owned()); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "test.example.com", false); + expected.hide_selectors.clear(); + expected + .remove_classes + .insert(".element".to_owned(), vec!["overlay".to_owned()]); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "a2.sub.example.com", false); + expected.remove_classes.clear(); + expected + .remove_classes + .insert(".element".to_owned(), vec!["banner".to_owned()]); + assert_eq!(out, expected); + + let out = cfcache.hostname_cosmetic_resources(&resources, "a3.example.com", false); + expected.remove_classes.clear(); + expected + .remove_classes + .insert(".element".to_owned(), vec!["banner".to_owned(), "overlay".to_owned()]); + // order is non-deterministic + if out != expected { + expected + .remove_classes + .get_mut(".element") + .unwrap() + .reverse(); + assert_eq!(out, expected); + } + } + + /// Avoid impossible type inference for type parameter `impl AsRef` + const EMPTY: &[&str] = &[]; + + #[test] + fn matching_hidden_class_id_selectors() { + let rules = [ + "##.a-class", + "###simple-id", + "##.a-class .with .children", + "##.children .including #simple-id", + "##a.a-class", + ]; + let cfcache = CosmeticFilterCache::from_rules( + rules + .iter() + .map(|r| CosmeticFilter::parse(r, false, Default::default()).unwrap()) + .collect::>(), + ); + + let out = cfcache.hidden_class_id_selectors(["with"], EMPTY, &HashSet::default()); + assert_eq!(out, Vec::::new()); + + let out = cfcache.hidden_class_id_selectors(EMPTY, ["with"], &HashSet::default()); + assert_eq!(out, Vec::::new()); + + let out = cfcache.hidden_class_id_selectors(EMPTY, ["a-class"], &HashSet::default()); + assert_eq!(out, Vec::::new()); + + let out = + cfcache.hidden_class_id_selectors(["simple-id"], EMPTY, &HashSet::default()); + assert_eq!(out, Vec::::new()); + + let out = cfcache.hidden_class_id_selectors(["a-class"], EMPTY, &HashSet::default()); + assert_eq!(out, [".a-class", ".a-class .with .children"]); + + let out = cfcache.hidden_class_id_selectors( + ["children", "a-class"], + EMPTY, + &HashSet::default(), + ); + assert_eq!( + out, + [ + ".children .including #simple-id", + ".a-class", + ".a-class .with .children", + ] + ); + + let out = + cfcache.hidden_class_id_selectors(EMPTY, ["simple-id"], &HashSet::default()); + assert_eq!(out, ["#simple-id"]); + + let out = cfcache.hidden_class_id_selectors( + ["children", "a-class"], + ["simple-id"], + &HashSet::default(), + ); + assert_eq!( + out, + [ + ".children .including #simple-id", + ".a-class", + ".a-class .with .children", + "#simple-id", + ] + ); + } + + #[test] + fn class_id_exceptions() { + let rules = vec![ + "##.a-class", + "###simple-id", + "##.a-class .with .children", + "##.children .including #simple-id", + "##a.a-class", + "example.*#@#.a-class", + "~test.com###test-element", + ]; + let cfcache = CosmeticFilterCache::from_rules( + rules + .iter() + .map(|r| CosmeticFilter::parse(r, false, Default::default()).unwrap()) + .collect::>(), + ); + let resources = ResourceStorage::default(); + let exceptions = cfcache + .hostname_cosmetic_resources(&resources, "example.co.uk", false) + .exceptions; + + let out = cfcache.hidden_class_id_selectors(["a-class"], EMPTY, &exceptions); + assert_eq!(out, [".a-class .with .children"]); + + let out = cfcache.hidden_class_id_selectors( + ["children", "a-class"], + ["simple-id"], + &exceptions, + ); + assert_eq!( + out, + [ + ".children .including #simple-id", + ".a-class .with .children", + "#simple-id", + ] + ); + + let out = cfcache.hidden_class_id_selectors(EMPTY, ["test-element"], &exceptions); + assert_eq!(out, ["#test-element"]); + + let exceptions = cfcache + .hostname_cosmetic_resources(&resources, "a1.test.com", false) + .exceptions; + + let out = cfcache.hidden_class_id_selectors(["a-class"], EMPTY, &exceptions); + assert_eq!(out, [".a-class", ".a-class .with .children"]); + + let out = cfcache.hidden_class_id_selectors( + ["children", "a-class"], + ["simple-id"], + &exceptions, + ); + assert_eq!( + out, + [ + ".children .including #simple-id", + ".a-class", + ".a-class .with .children", + "#simple-id", + ] + ); + + let out = cfcache.hidden_class_id_selectors(EMPTY, ["test-element"], &exceptions); + assert_eq!(out, Vec::::new()); + } + + #[test] + fn misc_generic_exceptions() { + let rules = vec![ + "##a[href=\"bad.com\"]", + "##div > p", + "##a[href=\"notbad.com\"]", + "example.com#@#div > p", + "~example.com##a[href=\"notbad.com\"]", + ]; + let cfcache = CosmeticFilterCache::from_rules( + rules + .iter() + .map(|r| CosmeticFilter::parse(r, false, Default::default()).unwrap()) + .collect::>(), + ); + let resources = ResourceStorage::default(); + + let hide_selectors = cfcache + .hostname_cosmetic_resources(&resources, "test.com", false) + .hide_selectors; + let mut expected_hides = HashSet::new(); + expected_hides.insert("a[href=\"bad.com\"]".to_owned()); + expected_hides.insert("div > p".to_owned()); + expected_hides.insert("a[href=\"notbad.com\"]".to_owned()); + assert_eq!(hide_selectors, expected_hides); + + let hide_selectors = cfcache + .hostname_cosmetic_resources(&resources, "example.com", false) + .hide_selectors; + let mut expected_hides = HashSet::new(); + expected_hides.insert("a[href=\"bad.com\"]".to_owned()); + assert_eq!(hide_selectors, expected_hides); + } + + #[test] + fn apply_to_tld() { + use crate::resources::ResourceType; + + // toolforge.org and github.io are examples of TLDs with multiple segments. These rules + // should still be parsed correctly and applied on corresponding subdomains. + let rules = vec![ + "toolforge.org##+js(abort-on-property-read, noAdBlockers)", + "github.io##div.adToBlock", + ]; + let cfcache = CosmeticFilterCache::from_rules( + rules + .iter() + .map(|r| CosmeticFilter::parse(r, false, Default::default()).unwrap()) + .collect::>(), + ); + let resources = ResourceStorage::from_resources([ + Resource { + name: "abort-on-property-read.js".into(), + aliases: vec!["aopr".to_string()], + kind: ResourceType::Template, + content: base64::encode("abort-on-property-read.js, {{1}}"), + dependencies: vec![], + permission: Default::default(), + } + ]); + + let injected_script = cfcache + .hostname_cosmetic_resources(&resources, "antonok.toolforge.org", false) + .injected_script; + assert_eq!( + injected_script, + "try {\nabort-on-property-read.js, noAdBlockers\n} catch ( e ) { }\n" + ); + + let hide_selectors = cfcache + .hostname_cosmetic_resources(&resources, "antonok.github.io", false) + .hide_selectors; + let mut expected_hides = HashSet::new(); + expected_hides.insert("div.adToBlock".to_owned()); + assert_eq!(hide_selectors, expected_hides); + } +} diff --git a/third_party/rust/adblock/v0_7/crate/src/data_format/mod.rs b/third_party/rust/adblock/v0_8/crate/src/data_format/mod.rs similarity index 52% rename from third_party/rust/adblock/v0_7/crate/src/data_format/mod.rs rename to third_party/rust/adblock/v0_8/crate/src/data_format/mod.rs index 1c54ca52b92f..e9f26aceac31 100644 --- a/third_party/rust/adblock/v0_7/crate/src/data_format/mod.rs +++ b/third_party/rust/adblock/v0_8/crate/src/data_format/mod.rs @@ -5,10 +5,9 @@ //! serialization/deserialization implementations and can automatically dispatch to the appropriate //! one. -mod legacy; mod v0; -pub mod utils; +pub(crate) mod utils; use crate::blocker::Blocker; use crate::cosmetic_filter_cache::CosmeticFilterCache; @@ -23,31 +22,12 @@ const ADBLOCK_RUST_DAT_MAGIC: [u8; 4] = [0xd1, 0xd9, 0x3a, 0xaf]; /// Note that this does not implement `Serialize` directly, as it is composed of parts which must /// be serialized independently. Instead, use the `serialize` method. pub(crate) enum SerializeFormat<'a> { - Legacy(legacy::SerializeFormat<'a>), V0(v0::SerializeFormat<'a>), } #[derive(Debug)] pub enum SerializationError { RmpSerdeError(rmp_serde::encode::Error), - GzError(std::io::Error), -} - -/// Since two different versions of `rmp-serde` are being used, errors must be converted to a -/// single implementation. -impl From for SerializationError { - fn from(e: rmp_serde_legacy::encode::Error) -> Self { - use rmp_serde::encode::Error as EncodeError; - use rmp_serde_legacy::encode::Error as LegacyEncodeError; - - let new_error = match e { - LegacyEncodeError::InvalidValueWrite(e) => EncodeError::InvalidValueWrite(e), - LegacyEncodeError::UnknownLength => EncodeError::UnknownLength, - LegacyEncodeError::DepthLimitExceeded => EncodeError::DepthLimitExceeded, - LegacyEncodeError::Syntax(e) => EncodeError::Syntax(e), - }; - Self::RmpSerdeError(new_error) - } } impl From for SerializationError { @@ -56,24 +36,13 @@ impl From for SerializationError { } } -impl From for SerializationError { - fn from(e: std::io::Error) -> Self { - Self::GzError(e) - } -} - impl<'a> SerializeFormat<'a> { - pub(crate) fn build(blocker: &'a Blocker, cfc: &'a CosmeticFilterCache, legacy: bool) -> Self { - if legacy { - Self::Legacy(legacy::SerializeFormat::from((blocker, cfc))) - } else { - Self::V0(v0::SerializeFormat::from((blocker, cfc))) - } + pub(crate) fn build(blocker: &'a Blocker, cfc: &'a CosmeticFilterCache) -> Self { + Self::V0(v0::SerializeFormat::from((blocker, cfc))) } pub(crate) fn serialize(&self) -> Result, SerializationError> { match self { - Self::Legacy(v) => v.serialize(), Self::V0(v) => v.serialize(), } } @@ -85,7 +54,6 @@ impl<'a> SerializeFormat<'a> { /// Note that this does not implement `Deserialize` directly, as it is composed of parts which must /// be deserialized independently. Instead, use the `deserialize` method. pub(crate) enum DeserializeFormat { - Legacy(legacy::DeserializeFormat), V0(v0::DeserializeFormat), } @@ -94,28 +62,10 @@ pub enum DeserializationError { RmpSerdeError(rmp_serde::decode::Error), UnsupportedFormatVersion(u8), NoHeaderFound, -} - -/// Since two different versions of `rmp-serde` are being used, errors must be converted to a -/// single implementation. -impl From for DeserializationError { - fn from(e: rmp_serde_legacy::decode::Error) -> Self { - use rmp_serde::decode::Error as DecodeError; - use rmp_serde_legacy::decode::Error as LegacyDecodeError; - - let new_error = match e { - LegacyDecodeError::InvalidMarkerRead(e) => DecodeError::InvalidMarkerRead(e), - LegacyDecodeError::InvalidDataRead(e) => DecodeError::InvalidDataRead(e), - LegacyDecodeError::TypeMismatch(m) => DecodeError::TypeMismatch(m), - LegacyDecodeError::OutOfRange => DecodeError::OutOfRange, - LegacyDecodeError::LengthMismatch(l) => DecodeError::LengthMismatch(l), - LegacyDecodeError::Uncategorized(e) => DecodeError::Uncategorized(e), - LegacyDecodeError::Syntax(e) => DecodeError::Syntax(e), - LegacyDecodeError::Utf8Error(e) => DecodeError::Utf8Error(e), - LegacyDecodeError::DepthLimitExceeded => DecodeError::DepthLimitExceeded, - }; - Self::RmpSerdeError(new_error) - } + /// Support for the legacy gzip-compressed data format was removed in version 0.8.0 of this + /// crate. If you still need it for some reason, you can convert it using 0.7.x by + /// deserializing and then reserializing it into the newer V0 format. + LegacyFormatNoLongerSupported, } impl From for DeserializationError { @@ -127,26 +77,23 @@ impl From for DeserializationError { impl DeserializeFormat { pub(crate) fn build(self) -> (Blocker, CosmeticFilterCache) { match self { - Self::Legacy(v) => v.into(), Self::V0(v) => v.into(), } } pub(crate) fn deserialize(serialized: &[u8]) -> Result { - /// adblock-rust has always used flate2 1.0.x for the legacy format, which has never - /// changed the header sequence from these 10 bits when the GzEncoder is left uncustomized. + /// adblock-rust's legacy DAT format has always used flate2 1.0.x, which has never changed + /// the header sequence from these 10 bits when the GzEncoder is left uncustomized. const FLATE2_GZ_HEADER_BYTES: [u8; 10] = [31, 139, 8, 0, 0, 0, 0, 0, 0, 255]; - if serialized.starts_with(&FLATE2_GZ_HEADER_BYTES) { - Ok(Self::Legacy(legacy::DeserializeFormat::deserialize( - serialized, - )?)) - } else if serialized.starts_with(&ADBLOCK_RUST_DAT_MAGIC) { + if serialized.starts_with(&ADBLOCK_RUST_DAT_MAGIC) { let version = serialized[ADBLOCK_RUST_DAT_MAGIC.len()]; match version { 0 => Ok(Self::V0(v0::DeserializeFormat::deserialize(serialized)?)), v => Err(DeserializationError::UnsupportedFormatVersion(v)), } + } else if serialized.starts_with(&FLATE2_GZ_HEADER_BYTES) { + Err(DeserializationError::LegacyFormatNoLongerSupported) } else { Err(DeserializationError::NoHeaderFound) } diff --git a/third_party/rust/adblock/v0_7/crate/src/data_format/utils.rs b/third_party/rust/adblock/v0_8/crate/src/data_format/utils.rs similarity index 100% rename from third_party/rust/adblock/v0_7/crate/src/data_format/utils.rs rename to third_party/rust/adblock/v0_8/crate/src/data_format/utils.rs diff --git a/third_party/rust/adblock/v0_7/crate/src/data_format/v0.rs b/third_party/rust/adblock/v0_8/crate/src/data_format/v0.rs similarity index 65% rename from third_party/rust/adblock/v0_7/crate/src/data_format/v0.rs rename to third_party/rust/adblock/v0_8/crate/src/data_format/v0.rs index 434362a2887c..d86c50bc70ec 100644 --- a/third_party/rust/adblock/v0_7/crate/src/data_format/v0.rs +++ b/third_party/rust/adblock/v0_8/crate/src/data_format/v0.rs @@ -12,14 +12,145 @@ use serde::{Deserialize, Serialize}; use crate::blocker::{Blocker, NetworkFilterList}; use crate::cosmetic_filter_cache::{CosmeticFilterCache, HostnameRuleDb}; use crate::filters::network::NetworkFilter; -use crate::resources::{RedirectResourceStorage, ScriptletResourceStorage}; +use crate::utils::Hash; use super::utils::{stabilize_hashmap_serialization, stabilize_hashset_serialization}; use super::{DeserializationError, SerializationError}; +/// Each variant describes a single rule that is specific to a particular hostname. +#[derive(Clone, Debug, Deserialize, Serialize)] +enum LegacySpecificFilterType { + Hide(String), + Unhide(String), + Style(String, String), + UnhideStyle(String, String), + ScriptInject(String), + UnhideScriptInject(String), +} + +#[derive(Deserialize, Serialize, Default)] +pub(crate) struct LegacyHostnameRuleDb { + #[serde(serialize_with = "crate::data_format::utils::stabilize_hashmap_serialization")] + db: HashMap>, +} + +impl From<&HostnameRuleDb> for LegacyHostnameRuleDb { + fn from(v: &HostnameRuleDb) -> Self { + let mut db = HashMap::>::new(); + for (hash, bin) in v.hide.0.iter() { + for f in bin { + db.entry(*hash) + .and_modify(|v| v.push(LegacySpecificFilterType::Hide(f.to_owned()))) + .or_insert_with(|| vec![LegacySpecificFilterType::Hide(f.to_owned())]); + } + } + for (hash, bin) in v.unhide.0.iter() { + for f in bin { + db.entry(*hash) + .and_modify(|v| v.push(LegacySpecificFilterType::Unhide(f.to_owned()))) + .or_insert_with(|| vec![LegacySpecificFilterType::Unhide(f.to_owned())]); + } + } + for (hash, bin) in v.inject_script.0.iter() { + for (f, _mask) in bin { + db.entry(*hash) + .and_modify(|v| v.push(LegacySpecificFilterType::ScriptInject(f.to_owned()))) + .or_insert_with(|| vec![LegacySpecificFilterType::ScriptInject(f.to_owned())]); + } + } + for (hash, bin) in v.uninject_script.0.iter() { + for f in bin { + db.entry(*hash) + .and_modify(|v| v.push(LegacySpecificFilterType::UnhideScriptInject(f.to_owned()))) + .or_insert_with(|| vec![LegacySpecificFilterType::UnhideScriptInject(f.to_owned())]); + } + } + for (hash, bin) in v.style.0.iter() { + for f in bin { + db.entry(*hash) + .and_modify(|v| v.push(LegacySpecificFilterType::Style(f.0.to_owned(), f.1.to_owned()))) + .or_insert_with(|| vec![LegacySpecificFilterType::Style(f.0.to_owned(), f.1.to_owned())]); + } + } + for (hash, bin) in v.unstyle.0.iter() { + for f in bin { + db.entry(*hash) + .and_modify(|v| v.push(LegacySpecificFilterType::UnhideStyle(f.0.to_owned(), f.1.to_owned()))) + .or_insert_with(|| vec![LegacySpecificFilterType::UnhideStyle(f.0.to_owned(), f.1.to_owned())]); + } + } + LegacyHostnameRuleDb { + db, + } + } +} + +impl Into for LegacyHostnameRuleDb { + fn into(self) -> HostnameRuleDb { + use crate::cosmetic_filter_cache::HostnameFilterBin; + + let mut hide = HostnameFilterBin::default(); + let mut unhide = HostnameFilterBin::default(); + let mut style = HostnameFilterBin::default(); + let mut unstyle = HostnameFilterBin::default(); + let mut inject_script = HostnameFilterBin::default(); + let mut uninject_script = HostnameFilterBin::default(); + + for (hash, bin) in self.db.into_iter() { + for rule in bin.into_iter() { + match rule { + LegacySpecificFilterType::Hide(s) => hide.insert(&hash, s), + LegacySpecificFilterType::Unhide(s) => unhide.insert(&hash, s), + LegacySpecificFilterType::Style(s, st) => style.insert(&hash, (s, st)), + LegacySpecificFilterType::UnhideStyle(s, st) => unstyle.insert(&hash, (s, st)), + LegacySpecificFilterType::ScriptInject(s) => inject_script.insert(&hash, (s, Default::default())), + LegacySpecificFilterType::UnhideScriptInject(s) => uninject_script.insert(&hash, s), + } + } + } + HostnameRuleDb { + hide, + unhide, + inject_script, + uninject_script, + remove: HostnameFilterBin::default(), + unremove: HostnameFilterBin::default(), + style, + unstyle, + remove_attr: HostnameFilterBin::default(), + unremove_attr: HostnameFilterBin::default(), + remove_class: HostnameFilterBin::default(), + unremove_class: HostnameFilterBin::default(), + } + } +} + +#[derive(Serialize, Deserialize, Debug, PartialEq, Clone)] +pub(crate) struct LegacyRedirectResource { + pub content_type: String, + pub data: String, +} + +#[derive(Serialize, Deserialize, Debug, PartialEq, Default)] +pub(crate) struct LegacyRedirectResourceStorage { + #[serde(serialize_with = "crate::data_format::utils::stabilize_hashmap_serialization")] + pub resources: HashMap, +} + +#[derive(Clone, Deserialize, Serialize)] +pub(crate) struct LegacyScriptletResource { + scriptlet: String, +} + +#[derive(Default, Deserialize, Serialize)] +pub(crate) struct LegacyScriptletResourceStorage { + #[serde(serialize_with = "crate::data_format::utils::stabilize_hashmap_serialization")] + resources: HashMap, +} + /// `_bug` is no longer used, and is removed from future format versions. #[derive(Debug, Clone, Serialize)] -pub struct NetworkFilterV0SerializeFmt<'a> { +struct NetworkFilterV0SerializeFmt<'a> { mask: &'a crate::filters::network::NetworkFilterMask, filter: &'a crate::filters::network::FilterPart, opt_domains: &'a Option>, @@ -127,7 +258,7 @@ pub(crate) struct SerializeFormat<'a> { enable_optimizations: bool, - resources: &'a RedirectResourceStorage, + resources: LegacyRedirectResourceStorage, #[serde(serialize_with = "stabilize_hashset_serialization")] simple_class_rules: &'a HashSet, @@ -138,12 +269,12 @@ pub(crate) struct SerializeFormat<'a> { #[serde(serialize_with = "stabilize_hashmap_serialization")] complex_id_rules: &'a HashMap>, - specific_rules: &'a HostnameRuleDb, + specific_rules: LegacyHostnameRuleDb, #[serde(serialize_with = "stabilize_hashset_serialization")] misc_generic_selectors: &'a HashSet, - scriptlets: &'a ScriptletResourceStorage, + scriptlets: LegacyScriptletResourceStorage, } impl<'a> SerializeFormat<'a> { @@ -231,18 +362,18 @@ pub(crate) struct DeserializeFormat { enable_optimizations: bool, - resources: RedirectResourceStorage, + _resources: LegacyRedirectResourceStorage, simple_class_rules: HashSet, simple_id_rules: HashSet, complex_class_rules: HashMap>, complex_id_rules: HashMap>, - specific_rules: HostnameRuleDb, + specific_rules: LegacyHostnameRuleDb, misc_generic_selectors: HashSet, - scriptlets: ScriptletResourceStorage, + _scriptlets: LegacyScriptletResourceStorage, } impl DeserializeFormat { @@ -271,18 +402,18 @@ impl<'a> From<(&'a Blocker, &'a CosmeticFilterCache)> for SerializeFormat<'a> { enable_optimizations: blocker.enable_optimizations, - resources: &blocker.resources, + resources: LegacyRedirectResourceStorage::default(), simple_class_rules: &cfc.simple_class_rules, simple_id_rules: &cfc.simple_id_rules, complex_class_rules: &cfc.complex_class_rules, complex_id_rules: &cfc.complex_id_rules, - specific_rules: &cfc.specific_rules, + specific_rules: (&cfc.specific_rules).into(), misc_generic_selectors: &cfc.misc_generic_selectors, - scriptlets: &cfc.scriptlets, + scriptlets: LegacyScriptletResourceStorage::default(), } } } @@ -305,7 +436,6 @@ impl From for (Blocker, CosmeticFilterCache) { enable_optimizations: v.enable_optimizations, - resources: v.resources, #[cfg(feature = "object-pooling")] pool: Default::default(), regex_manager: Default::default(), @@ -316,11 +446,9 @@ impl From for (Blocker, CosmeticFilterCache) { complex_class_rules: v.complex_class_rules, complex_id_rules: v.complex_id_rules, - specific_rules: v.specific_rules, + specific_rules: v.specific_rules.into(), misc_generic_selectors: v.misc_generic_selectors, - - scriptlets: v.scriptlets, }, ) } diff --git a/third_party/rust/adblock/v0_8/crate/src/engine.rs b/third_party/rust/adblock/v0_8/crate/src/engine.rs new file mode 100644 index 000000000000..204f45529c59 --- /dev/null +++ b/third_party/rust/adblock/v0_8/crate/src/engine.rs @@ -0,0 +1,856 @@ +//! The adblock [`Engine`] is the primary interface for adblocking. + +use crate::blocker::{Blocker, BlockerOptions, BlockerResult}; +use crate::cosmetic_filter_cache::{CosmeticFilterCache, UrlSpecificResources}; +use crate::lists::{FilterSet, ParseOptions}; +use crate::regex_manager::RegexManagerDiscardPolicy; +use crate::request::Request; +use crate::resources::{Resource, ResourceStorage}; + +use std::collections::HashSet; + +/// Drives high-level blocking logic and is responsible for loading filter lists into an optimized +/// format that can be queried efficiently. +/// +/// For performance optimization reasons, the [`Engine`] is not designed to have rules added or +/// removed after its initial creation. Making changes to the rules loaded is accomplished by +/// creating a new engine to replace it. +/// +/// ## Usage +/// +/// ### Initialization +/// +/// You'll first want to combine all of your filter lists in a [`FilterSet`], which will parse list +/// header metadata. Once all lists have been composed together, you can call +/// [`Engine::from_filter_set`] to start using them for blocking. +/// +/// You may also want to supply certain assets for `$redirect` filters and `##+js(...)` scriptlet +/// injections. These are known as [`Resource`]s, and can be provided with +/// [`Engine::use_resources`]. See the [`crate::resources`] module for more information. +/// +/// ### Network blocking +/// +/// Use the [`Engine::check_network_request`] method to determine how to handle a network request. +/// +/// If you _only_ need network blocking, consider using a [`Blocker`] directly. +/// +/// ### Cosmetic filtering +/// +/// Call [`Engine::url_cosmetic_resources`] to determine what actions should be taken to prepare a +/// particular page before it starts loading. +/// +/// Once the page has been loaded, any new CSS classes or ids that appear on the page should be passed to +/// [`Engine::hidden_class_id_selectors`] on an ongoing basis to determine additional elements that +/// should be hidden dynamically. +pub struct Engine { + blocker: Blocker, + cosmetic_cache: CosmeticFilterCache, + resources: ResourceStorage, +} + +impl Default for Engine { + /// Equivalent to `Engine::new(true)`. + fn default() -> Self { + Self::new(true) + } +} + +impl Engine { + /// Creates a new adblocking `Engine`. `Engine`s created without rules should generally only be + /// used with deserialization. + /// - `optimize` specifies whether or not to attempt to compress the internal representation by + /// combining similar rules. + pub fn new(optimize: bool) -> Self { + let blocker_options = BlockerOptions { + enable_optimizations: optimize, + }; + + Self { + blocker: Blocker::new(vec![], &blocker_options), + cosmetic_cache: CosmeticFilterCache::new(), + resources: ResourceStorage::default(), + } + } + + /// Loads rules in a single format, enabling optimizations and discarding debug information. + pub fn from_rules(rules: impl IntoIterator>, opts: ParseOptions) -> Self { + let mut filter_set = FilterSet::new(false); + filter_set.add_filters(rules, opts); + Self::from_filter_set(filter_set, true) + } + + /// Loads rules, enabling optimizations and including debug information. + pub fn from_rules_debug(rules: impl IntoIterator>, opts: ParseOptions) -> Self { + Self::from_rules_parametrised(rules, opts, true, true) + } + + pub fn from_rules_parametrised(filter_rules: impl IntoIterator>, opts: ParseOptions, debug: bool, optimize: bool) -> Self { + let mut filter_set = FilterSet::new(debug); + filter_set.add_filters(filter_rules, opts); + Self::from_filter_set(filter_set, optimize) + } + + /// Loads rules from the given `FilterSet`. It is recommended to use a `FilterSet` when adding + /// rules from multiple sources. + pub fn from_filter_set(set: FilterSet, optimize: bool) -> Self { + let FilterSet { network_filters, cosmetic_filters, .. } = set; + + let blocker_options = BlockerOptions { + enable_optimizations: optimize, + }; + + Self { + blocker: Blocker::new(network_filters, &blocker_options), + cosmetic_cache: CosmeticFilterCache::from_rules(cosmetic_filters), + resources: ResourceStorage::default(), + } + } + + /// Serializes the `Engine` into a binary format so that it can be quickly reloaded later. + pub fn serialize_raw(&self) -> Result, crate::data_format::SerializationError> { + use crate::data_format::SerializeFormat; + + let serialize_format = SerializeFormat::build(&self.blocker, &self.cosmetic_cache); + + serialize_format.serialize() + } + + /// Deserialize the `Engine` from the binary format generated by `Engine::serialize_raw`. The + /// method will automatically select the correct deserialization implementation. + pub fn deserialize(&mut self, serialized: &[u8]) -> Result<(), crate::data_format::DeserializationError> { + use crate::data_format::DeserializeFormat; + let current_tags = self.blocker.tags_enabled(); + let deserialize_format = DeserializeFormat::deserialize(serialized)?; + let (blocker, cosmetic_cache) = deserialize_format.build(); + self.blocker = blocker; + self.blocker.use_tags(¤t_tags.iter().map(|s| &**s).collect::>()); + self.cosmetic_cache = cosmetic_cache; + Ok(()) + } + + /// Check if a request for a network resource from `url`, of type `request_type`, initiated by + /// `source_url`, should be blocked. + pub fn check_network_request(&self, request: &Request) -> BlockerResult { + self.blocker.check(request, &self.resources) + } + + pub fn check_network_request_subset( + &self, + request: &Request, + previously_matched_rule: bool, + force_check_exceptions: bool, + ) -> BlockerResult { + self.blocker.check_parameterised(request, &self.resources, previously_matched_rule, force_check_exceptions) + } + + /// Returns a string containing any additional CSP directives that should be added to this + /// request's response. Only applies to document and subdocument requests. + /// + /// If multiple policies are present from different rules, they will be joined by commas. + pub fn get_csp_directives( + &self, + request: &Request, + ) -> Option { + self.blocker.get_csp_directives(request) + } + + /// Sets this engine's tags to be _only_ the ones provided in `tags`. + /// + /// Tags can be used to cheaply enable or disable network rules with a corresponding `$tag` + /// option. + pub fn use_tags(&mut self, tags: &[&str]) { + self.blocker.use_tags(tags); + } + + /// Sets this engine's tags to additionally include the ones provided in `tags`. + /// + /// Tags can be used to cheaply enable or disable network rules with a corresponding `$tag` + /// option. + pub fn enable_tags(&mut self, tags: &[&str]) { + self.blocker.enable_tags(tags); + } + + /// Sets this engine's tags to no longer include the ones provided in `tags`. + /// + /// Tags can be used to cheaply enable or disable network rules with a corresponding `$tag` + /// option. + pub fn disable_tags(&mut self, tags: &[&str]) { + self.blocker.disable_tags(tags); + } + + /// Checks if a given tag exists in this engine. + /// + /// Tags can be used to cheaply enable or disable network rules with a corresponding `$tag` + /// option. + pub fn tag_exists(&self, tag: &str) -> bool { + self.blocker.tags_enabled().contains(&tag.to_owned()) + } + + /// Sets this engine's resources to be _only_ the ones provided in `resources`. + pub fn use_resources(&mut self, resources: impl IntoIterator) { + self.resources = ResourceStorage::from_resources(resources); + } + + /// Sets this engine's resources to additionally include `resource`. + pub fn add_resource(&mut self, resource: Resource) -> Result<(), crate::resources::AddResourceError> { + self.resources.add_resource(resource) + } + + // Cosmetic filter functionality + + /// If any of the provided CSS classes or ids could cause a certain generic CSS hide rule + /// (i.e. `{ display: none !important; }`) to be required, this method will return a list of + /// CSS selectors corresponding to rules referencing those classes or ids, provided that the + /// corresponding rules are not excepted. + /// + /// `exceptions` should be passed directly from `UrlSpecificResources`. + pub fn hidden_class_id_selectors(&self, classes: impl IntoIterator>, ids: impl IntoIterator>, exceptions: &HashSet) -> Vec { + self.cosmetic_cache.hidden_class_id_selectors(classes, ids, exceptions) + } + + /// Returns a set of cosmetic filter resources required for a particular url. Once this has + /// been called, all CSS ids and classes on a page should be passed to + /// `hidden_class_id_selectors` to obtain any stylesheets consisting of generic rules (if the + /// returned `generichide` value is false). + pub fn url_cosmetic_resources(&self, url: &str) -> UrlSpecificResources { + let request = Request::new(url, url, "document"); + if request.is_err() { + return UrlSpecificResources::empty(); + } + let request = request.unwrap(); + + let generichide = self.blocker.check_generic_hide(&request); + self.cosmetic_cache.hostname_cosmetic_resources(&self.resources, &request.hostname, generichide) + } + + pub fn set_regex_discard_policy( + &mut self, + new_discard_policy: RegexManagerDiscardPolicy + ) { + self.blocker.set_regex_discard_policy(new_discard_policy); + } + + #[cfg(feature = "regex-debug-info")] + pub fn discard_regex(&mut self, regex_id: u64) { + self.blocker.discard_regex(regex_id); + } + + #[cfg(feature = "regex-debug-info")] + pub fn get_regex_debug_info(&self) -> crate::regex_manager::RegexDebugInfo { + self.blocker.get_regex_debug_info() + } +} + +/// Static assertions for `Engine: Send + Sync` traits. +#[cfg(not(any(feature = "object-pooling", feature = "unsync-regex-caching")))] +fn _assertions() { + fn _assert_send() {} + fn _assert_sync() {} + + _assert_send::(); + _assert_sync::(); +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::resources::MimeType; + use crate::lists::FilterFormat; + + #[test] + fn tags_enable_adds_tags() { + let filters = [ + "adv$tag=stuff", + "somelongpath/test$tag=stuff", + "||brianbondy.com/$tag=brian", + "||brave.com$tag=brian", + ]; + let url_results = [ + ("http://example.com/advert.html", true), + ("http://example.com/somelongpath/test/2.html", true), + ("https://brianbondy.com/about", true), + ("https://brave.com/about", true), + ]; + + let mut engine = Engine::from_rules(&filters, Default::default()); + engine.enable_tags(&["stuff"]); + engine.enable_tags(&["brian"]); + + url_results.into_iter().for_each(|(url, expected_result)| { + let request = Request::new(&url, "", "").unwrap(); + let matched_rule = engine.check_network_request(&request); + if expected_result { + assert!(matched_rule.matched, "Expected match for {}", url); + } else { + assert!(!matched_rule.matched, "Expected no match for {}, matched with {:?}", url, matched_rule.filter); + } + }); + } + + #[test] + fn tags_disable_works() { + let filters = [ + "adv$tag=stuff", + "somelongpath/test$tag=stuff", + "||brianbondy.com/$tag=brian", + "||brave.com$tag=brian", + ]; + let url_results = [ + ("http://example.com/advert.html", false), + ("http://example.com/somelongpath/test/2.html", false), + ("https://brianbondy.com/about", true), + ("https://brave.com/about", true), + ]; + + let mut engine = Engine::from_rules(&filters, Default::default()); + engine.enable_tags(&["brian", "stuff"]); + engine.disable_tags(&["stuff"]); + + url_results.into_iter().for_each(|(url, expected_result)| { + let request = Request::new(&url, "", "").unwrap(); + let matched_rule = engine.check_network_request(&request); + if expected_result { + assert!(matched_rule.matched, "Expected match for {}", url); + } else { + assert!(!matched_rule.matched, "Expected no match for {}, matched with {:?}", url, matched_rule.filter); + } + }); + } + + #[test] + fn exception_tags_inactive_by_default() { + let filters = [ + "adv", + "||brianbondy.com/$tag=brian", + "@@||brianbondy.com/$tag=brian", + ]; + let url_results = [ + ("http://example.com/advert.html", true), + ("https://brianbondy.com/about", false), + ("https://brianbondy.com/advert", true), + ]; + + let engine = Engine::from_rules(&filters, Default::default()); + + url_results.into_iter().for_each(|(url, expected_result)| { + let request = Request::new(&url, "", "").unwrap(); + let matched_rule = engine.check_network_request(&request); + if expected_result { + assert!(matched_rule.matched, "Expected match for {}", url); + } else { + assert!(!matched_rule.matched, "Expected no match for {}, matched with {:?}", url, matched_rule.filter); + } + }); + } + + #[test] + fn exception_tags_works() { + let filters = [ + "adv", + "||brianbondy.com/$tag=brian", + "@@||brianbondy.com/$tag=brian", + ]; + let url_results = [ + ("http://example.com/advert.html", true), + ("https://brianbondy.com/about", false), + ("https://brianbondy.com/advert", false), + ]; + + let mut engine = Engine::from_rules(&filters, Default::default()); + engine.enable_tags(&["brian", "stuff"]); + + url_results.into_iter().for_each(|(url, expected_result)| { + let request = Request::new(&url, "", "").unwrap(); + let matched_rule = engine.check_network_request(&request); + if expected_result { + assert!(matched_rule.matched, "Expected match for {}", url); + } else { + assert!(!matched_rule.matched, "Expected no match for {}, matched with {:?}", url, matched_rule.filter); + } + }); + } + + #[test] + fn serialization_retains_tags() { + let filters = [ + "adv$tag=stuff", + "somelongpath/test$tag=stuff", + "||brianbondy.com/$tag=brian", + "||brave.com$tag=brian", + ]; + let url_results = [ + ("http://example.com/advert.html", true), + ("http://example.com/somelongpath/test/2.html", true), + ("https://brianbondy.com/about", false), + ("https://brave.com/about", false), + ]; + + let mut engine = Engine::from_rules(&filters, Default::default()); + engine.enable_tags(&["stuff"]); + engine.enable_tags(&["brian"]); + let serialized = engine.serialize_raw().unwrap(); + let mut deserialized_engine = Engine::default(); + deserialized_engine.enable_tags(&["stuff"]); + deserialized_engine.deserialize(&serialized).unwrap(); + + url_results.into_iter().for_each(|(url, expected_result)| { + let request = Request::new(&url, "", "").unwrap(); + let matched_rule = deserialized_engine.check_network_request(&request); + if expected_result { + assert!(matched_rule.matched, "Expected match for {}", url); + } else { + assert!(!matched_rule.matched, "Expected no match for {}, matched with {:?}", url, matched_rule.filter); + } + }); + } + + #[test] + fn deserialization_backwards_compatible_plain() { + // deserialization_generate_simple(); + // assert!(false); + // converted from the legacy compressed format + let serialized = [209, 217, 58, 175, 0, 220, 0, 17, 145, 128, 145, 128, 145, 128, 145, 128, + 145, 128, 145, 129, 207, 202, 167, 36, 217, 43, 56, 97, 176, 145, 157, 145, 206, 0, 3, + 31, 255, 129, 1, 169, 97, 100, 45, 98, 97, 110, 110, 101, 114, 192, 192, 192, 192, 192, + 192, 192, 192, 207, 186, 136, 69, 13, 115, 187, 170, 226, 192, 192, 145, 128, 144, 195, + 145, 128, 144, 144, 128, 128, 145, 128, 144, 145, 128]; + let mut deserialized_engine = Engine::default(); + deserialized_engine.deserialize(&serialized).unwrap(); + + let url = "http://example.com/ad-banner.gif"; + let request = Request::new(&url, "", "").unwrap(); + let matched_rule = deserialized_engine.check_network_request(&request); + assert!(matched_rule.matched, "Expected match for {}", url); + } + + #[test] + fn deserialization_backwards_compatible_tags() { + // deserialization_generate_tags(); + // assert!(false); + // converted from the legacy compressed format + let serialized = [209, 217, 58, 175, 0, 220, 0, 17, 145, 128, 145, 128, 145, 128, 145, 128, + 145, 128, 145, 128, 145, 128, 145, 157, 145, 206, 0, 3, 31, 255, 129, 1, 169, 97, 100, + 45, 98, 97, 110, 110, 101, 114, 192, 192, 192, 192, 192, 192, 163, 97, 98, 99, 192, + 207, 126, 212, 53, 83, 113, 159, 143, 134, 192, 192, 195, 145, 128, 144, 144, 128, 128, + 145, 128, 144, 145, 128]; + let mut deserialized_engine = Engine::default(); + + deserialized_engine.enable_tags(&[]); + deserialized_engine.deserialize(&serialized).unwrap(); + let url = "http://example.com/ad-banner.gif"; + let request = Request::new(&url, "", "").unwrap(); + let matched_rule = deserialized_engine.check_network_request(&request); + assert!(!matched_rule.matched, "Expected NO match for {}", url); + + deserialized_engine.enable_tags(&["abc"]); + deserialized_engine.deserialize(&serialized).unwrap(); + + let url = "http://example.com/ad-banner.gif"; + let request = Request::new(&url, "", "").unwrap(); + let matched_rule = deserialized_engine.check_network_request(&request); + assert!(matched_rule.matched, "Expected match for {}", url); + } + + #[test] + fn deserialization_generate_simple() { + let mut engine = Engine::from_rules(&[ + "ad-banner", + ], Default::default()); + let serialized = engine.serialize_raw().unwrap(); + println!("Engine serialized: {:?}", serialized); + engine.deserialize(&serialized).unwrap(); + } + + #[test] + fn deserialization_generate_tags() { + let mut engine = Engine::from_rules(&[ + "ad-banner$tag=abc", + ], Default::default()); + engine.use_tags(&["abc"]); + let serialized = engine.serialize_raw().unwrap(); + println!("Engine serialized: {:?}", serialized); + engine.deserialize(&serialized).unwrap(); + } + + #[test] + fn deserialization_generate_resources() { + let mut engine = Engine::from_rules(&[ + "ad-banner$redirect=nooptext", + ], Default::default()); + + engine.use_resources([ + Resource::simple("nooptext", MimeType::TextPlain, ""), + Resource::simple("noopcss", MimeType::TextCss, ""), + ]); + + let serialized = engine.serialize_raw().unwrap(); + println!("Engine serialized: {:?}", serialized); + engine.deserialize(&serialized).unwrap(); + } + + #[test] + fn redirect_resource_insertion_works() { + let mut engine = Engine::from_rules(&[ + "ad-banner$redirect=nooptext", + "script.js$redirect=noop.js", + ], Default::default()); + + let script = r#" +(function() { + ; +})(); + + "#; + let mut resources = [ + Resource::simple("nooptext", MimeType::TextPlain, ""), + Resource::simple("noopjs", MimeType::ApplicationJavascript, script), + ]; + resources[1].aliases.push("noop.js".to_string()); + engine.use_resources(resources); + + let url = "http://example.com/ad-banner.gif"; + let request = Request::new(url, "", "").unwrap(); + let matched_rule = engine.check_network_request(&request); + assert!(matched_rule.matched, "Expected match for {}", url); + assert_eq!(matched_rule.redirect, Some("data:text/plain;base64,".to_owned()), "Expected redirect to contain resource"); + + let url = "http://example.com/script.js"; + let request = Request::new(url, "", "").unwrap(); + let matched_rule = engine.check_network_request(&request); + assert!(matched_rule.matched, "Expected match for {}", url); + assert_eq!(matched_rule.redirect, Some(format!("data:application/javascript;base64,{}", base64::encode(format!("{}", script)))), "Expected redirect to contain resource"); + } + + #[test] + fn document() { + let filters = [ + "||example.com$document", + "@@||sub.example.com$document", + ]; + + let engine = Engine::from_rules_debug(&filters, Default::default()); + + assert!(engine.check_network_request(&Request::new("https://example.com", "https://example.com", "document").unwrap()).matched); + assert!(!engine.check_network_request(&Request::new("https://example.com", "https://example.com", "script").unwrap()).matched); + assert!(engine.check_network_request(&Request::new("https://sub.example.com", "https://sub.example.com", "document").unwrap()).exception.is_some()); + } + + #[test] + fn implicit_all() { + { + let engine = Engine::from_rules_debug(["||example.com^"], Default::default()); + assert!(engine.check_network_request(&Request::new("https://example.com", "https://example.com", "document").unwrap()).matched); + } + { + let engine = Engine::from_rules_debug(["||example.com^$first-party"], Default::default()); + assert!(engine.check_network_request(&Request::new("https://example.com", "https://example.com", "document").unwrap()).matched); + } + { + let engine = Engine::from_rules_debug(["||example.com^$script"], Default::default()); + assert!(!engine.check_network_request(&Request::new("https://example.com", "https://example.com", "document").unwrap()).matched); + } + { + let engine = Engine::from_rules_debug(["||example.com^$~script"], Default::default()); + assert!(!engine.check_network_request(&Request::new("https://example.com", "https://example.com", "document").unwrap()).matched); + } + { + let engine = Engine::from_rules_debug(["||example.com^$document", "@@||example.com^$generichide"], Default::default()); + assert!(engine.check_network_request(&Request::new("https://example.com", "https://example.com", "document").unwrap()).matched); + } + { + let engine = Engine::from_rules_debug(["example.com"], ParseOptions { format: FilterFormat::Hosts, ..Default::default() }); + assert!(engine.check_network_request(&Request::new("https://example.com", "https://example.com", "document").unwrap()).matched); + } + { + let engine = Engine::from_rules_debug(["||example.com/path"], Default::default()); + assert!(!engine.check_network_request(&Request::new("https://example.com/path", "https://example.com/path", "document").unwrap()).matched); + } + { + let engine = Engine::from_rules_debug(["||example.com/path^"], Default::default()); + assert!(!engine.check_network_request(&Request::new("https://example.com/path", "https://example.com/path", "document").unwrap()).matched); + } + } + + #[test] + fn generichide() { + let filters = [ + "##.donotblock", + "##a[href=\"generic.com\"]", + + "@@||example.com$generichide", + "example.com##.block", + + "@@||example2.com/test.html$generichide", + "example2.com##.block", + ]; + let url_results = [ + ("https://example.com", vec![".block"], true), + ("https://example.com/test.html", vec![".block"], true), + ("https://example2.com", vec![".block", "a[href=\"generic.com\"]"], false), + ("https://example2.com/test.html", vec![".block"], true), + ]; + + let engine = Engine::from_rules(&filters, Default::default()); + + url_results.into_iter().for_each(|(url, expected_result, expected_generichide)| { + let result = engine.url_cosmetic_resources(url); + assert_eq!(result.hide_selectors, expected_result.iter().map(|s| s.to_string()).collect::>()); + assert_eq!(result.generichide, expected_generichide); + }); + } + + #[test] + fn important_redirect() { + let mut filter_set = FilterSet::new(true); + filter_set.add_filters([ + "||addthis.com^$important,3p,domain=~missingkids.com|~missingkids.org|~sainsburys.jobs|~sitecore.com|~amd.com", + "||addthis.com/*/addthis_widget.js$script,redirect=addthis.com/addthis_widget.js", + ], Default::default()); + let mut engine = Engine::from_filter_set(filter_set, false); + + engine.add_resource( + Resource::simple("addthis.com/addthis_widget.js", MimeType::ApplicationJavascript, "window.addthis = undefined"), + ).unwrap(); + + let request = Request::new("https://s7.addthis.com/js/250/addthis_widget.js?pub=resto", "https://www.rhmodern.com/catalog/product/product.jsp?productId=prod14970086&categoryId=cat7150028", "script").unwrap(); + let result = engine.check_network_request(&request); + + assert!(result.redirect.is_some()); + } + + #[test] + fn check_match_case_regex_filtering() { + { + // match case without regex is discarded + let engine = Engine::from_rules_debug(["ad.png$match-case"], Default::default()); + let request = Request::new("https://example.com/ad.png", "https://example.com", "image").unwrap(); + assert!(!engine.check_network_request(&request).matched); + } + { + // /^https:\/\/[0-9a-z]{3,}\.[-a-z]{10,}\.(?:li[fv]e|top|xyz)\/[a-z]{8}\/\?utm_campaign=\w{40,}/$doc,match-case,domain=life|live|top|xyz + let engine = Engine::from_rules_debug([r#"/^https:\/\/[0-9a-z]{3,}\.[-a-z]{10,}\.(?:li[fv]e|top|xyz)\/[a-z]{8}\/\?utm_campaign=\w{40,}/$doc,match-case,domain=life|live|top|xyz"#], Default::default()); + let request = Request::new("https://www.exampleaaa.xyz/testtest/?utm_campaign=aaaaaaaaaabbbbbbbbbbccccccccccdddddddddd", "https://www.exampleaaa.xyz/testtest/?utm_campaign=aaaaaaaaaabbbbbbbbbbccccccccccdddddddddd", "document").unwrap(); + assert!(engine.check_network_request(&request).matched); + } + // fails - because of non-supported look around operator in rust regex https://github.com/rust-lang/regex/issues/127#issuecomment-154713666 + /*{ + // /^https?:\/\/((?!www)[a-z]{3,}|\d{2})?\.?[-0-9a-z]{6,}\.[a-z]{2,6}\/(?:[a-z]{6,8}\/)?\/?\?u=[0-9a-z]{7}&o=[0-9a-z]{7}/$doc,frame,match-case,domain=buzz|com|de|fun|guru|info|life|live|mobi|online|pw|site|space|top|us|xyz + let engine = Engine::from_rules_debug([r#"/^https?:\/\/((?!www)[a-z]{3,}|\d{2})?\.?[-0-9a-z]{6,}\.[a-z]{2,6}\/(?:[a-z]{6,8}\/)?\/?\?u=[0-9a-z]{7}&o=[0-9a-z]{7}/$doc,frame,match-case,domain=buzz|com|de|fun|guru|info|life|live|mobi|online|pw|site|space|top|us|xyz"#], Default::default()); + let request = Request::new("https://example.com/aaaaaa/?u=aaaaaaa&o=bbbbbbb", + "https://example.com/aaaaaa/?u=aaaaaaa&o=bbbbbbb", + "document").unwrap(); + assert!(engine.check_network_request(&request).matched); + }*/ + // fails - because of non-supported look around operator in rust regex https://github.com/rust-lang/regex/issues/127#issuecomment-154713666 + /*{ + // /^https:\/\/(?:www\d\.)?[-a-z]{6,}\.(?:com|info|net|org)\/(?=[-_a-zA-Z]{0,42}\d)(?=[-_0-9a-z]{0,42}[A-Z])[-_0-9a-zA-Z]{43}\/\?cid=[-_0-9a-zA-Z]{16,36}(?:&qs\d=\S+)?&sid=[_0-9a-f]{1,32}$/$doc,match-case,domain=com|info|net|org + let engine = Engine::from_rules_debug([r#"/^https:\/\/(?:www\d\.)?[-a-z]{6,}\.(?:com|info|net|org)\/(?=[-_a-zA-Z]{0,42}\d)(?=[-_0-9a-z]{0,42}[A-Z])[-_0-9a-zA-Z]{43}\/\?cid=[-_0-9a-zA-Z]{16,36}(?:&qs\d=\S+)?&sid=[_0-9a-f]{1,32}$/$doc,match-case,domain=com|info|net|org"#], Default::default()); + let request = Request::new("https://www3.example.com/aaaaaaaaaabbbbbbbbbbccccccccccddddddddddAA5/?cid=aaaaaaaaaabbbbbb&qs5=\n&sid=a", + "https://www3.example.com/aaaaaaaaaabbbbbbbbbbccccccccccddddddddddAA5/?cid=aaaaaaaaaabbbbbb&qs5=\n&sid=a", + "document").unwrap(); + assert!(engine.check_network_request(&request).matched); + }*/ + // fails - because of non-supported look around operator in rust regex https://github.com/rust-lang/regex/issues/127#issuecomment-154713666 + /*{ + // /^https:\/\/(?:www\d\.)?[-a-z]{6,}\.(?:com|info|net|org)\/(?=[-_a-zA-Z]{0,42}\d)(?=[-_0-9a-z]{0,42}[A-Z])[-_0-9a-zA-Z]{43}\/\?sid=[_0-9a-f]{1,32}(?:&qs\d=\S+)?&cid=[-_0-9a-zA-Z]{16,36}$/$doc,match-case,domain=com|info|net|org + let engine = Engine::from_rules_debug([r#"/^https:\/\/(?:www\d\.)?[-a-z]{6,}\.(?:com|info|net|org)\/(?=[-_a-zA-Z]{0,42}\d)(?=[-_0-9a-z]{0,42}[A-Z])[-_0-9a-zA-Z]{43}\/\?cid=[-_0-9a-zA-Z]{16,36}(?:&qs\d=\S+)?&sid=[_0-9a-f]{1,32}$/$doc,match-case,domain=com|info|net|org"#], Default::default()); + let request = Request::new("https://www3.example.com/aaaaaaaaaabbbbbbbbbbccccccccccddddddddddAA5/?sid=1&qs1=\n&cid=aaaaaaaaaabbbbbb", + "https://www3.example.com/aaaaaaaaaabbbbbbbbbbccccccccccddddddddddAA5/?sid=1&qs1=\n&cid=aaaaaaaaaabbbbbb", + "document").unwrap(); + assert!(engine.check_network_request(&request).matched); + }*/ + { + // /^http:\/\/[a-z]{5}\.[a-z]{5}\.com\/[a-z]{10}\.apk$/$doc,match-case,domain=com + let engine = Engine::from_rules_debug([r#"/^http:\/\/[a-z]{5}\.[a-z]{5}\.com\/[a-z]{10}\.apk$/$doc,match-case,domain=com"#], Default::default()); + let request = Request::new("http://abcde.abcde.com/aaaaabbbbb.apk", "http://abcde.abcde.com/aaaaabbbbb.apk", "document").unwrap(); + assert!(engine.check_network_request(&request).matched); + } + // fails - because of non-supported look around operator in rust regex https://github.com/rust-lang/regex/issues/127#issuecomment-154713666 + /*{ + // /\/[A-Z]\/[-0-9a-z]{5,}\.com\/(?:[0-9a-f]{2}\/){3}[0-9a-f]{32}\.js$/$script,1p,match-case + let engine = Engine::from_rules_debug([r#"/\/[A-Z]\/[-0-9a-z]{5,}\.com\/(?:[0-9a-f]{2}\/){3}[0-9a-f]{32}\.js$/$script,1p,match-case"#], Default::default()); + let request = Request::new("/A/aaaaa.com/aa/bb/cc/aaaaaaaabbbbbbbbccccccccdddddddd.js", + "/A/aaaaa.com/aa/bb/cc/aaaaaaaabbbbbbbbccccccccdddddddd.js", + "script").unwrap(); + assert!(engine.check_network_request(&request).matched); + }*/ + // fails - because of non-supported look around operator in rust regex https://github.com/rust-lang/regex/issues/127#issuecomment-154713666 + /*{ + // /^https?:\/\/(?:[a-z]{2}\.)?[0-9a-z]{7,16}\.com\/[a-z](?=[a-z]{0,25}[0-9A-Z])[0-9a-zA-Z]{3,26}\/(?:[1-5]\d{4}|[3-9]\d{3})\??(?:_=\d+|v=\d)?$/$frame,script,xhr,popup,3p,match-case + let engine = Engine::from_rules_debug([r#"/^https?:\/\/(?:[a-z]{2}\.)?[0-9a-z]{7,16}\.com\/[a-z](?=[a-z]{0,25}[0-9A-Z])[0-9a-zA-Z]{3,26}\/(?:[1-5]\d{4}|[3-9]\d{3})\??(?:_=\d+|v=\d)?$/$frame,script,xhr,popup,3p,match-case"#], Default::default()); + let request = Request::new("https://aa.example.com/aAaaa/12222", + "https://aa.example.net/aAaaa/12222", + "frame").unwrap(); + assert!(engine.check_network_request(&request).matched); + }*/ + // fails - because of non-supported look around operator in rust regex https://github.com/rust-lang/regex/issues/127#issuecomment-154713666 + /*{ + // /^https?:\/\/(?:[a-z]{2}\.)?[0-9a-z]{7,16}\.website\/[a-z](?=[a-z]{0,25}[0-9A-Z])[0-9a-zA-Z]{3,26}\/(?:[1-5]\d{4}|[3-9]\d{3})\??(?:_=\d+|v=\d)?$/$frame,script,xhr,popup,3p,match-case + let engine = Engine::from_rules_debug([r#"/^https?:\/\/(?:[a-z]{2}\.)?[0-9a-z]{7,16}\.website\/[a-z](?=[a-z]{0,25}[0-9A-Z])[0-9a-zA-Z]{3,26}\/(?:[1-5]\d{4}|[3-9]\d{3})\??(?:_=\d+|v=\d)?$/$frame,script,xhr,popup,3p,match-case"#], Default::default()); + let request = Request::new("https://aa.example.website/aAaaa/12222", + "https://aa.example.website/aAaaa/12222", + "frame").unwrap(); + assert!(engine.check_network_request(&request).matched); + }*/ + // fails - because of non-supported look around operator in rust regex https://github.com/rust-lang/regex/issues/127#issuecomment-154713666 + /*{ + // /^https?:\/\/[a-z]{8,15}\.top(\/(?:\d{1,5}|0NaN|articles?|browse|index|movie|news|pages?|static|view|web|wiki)){1,4}(?:\.html|\/)$/$frame,3p,match-case + let engine = Engine::from_rules_debug([r#"/^https?:\/\/[a-z]{8,15}\.top(\/(?:\d{1,5}|0NaN|articles?|browse|index|movie|news|pages?|static|view|web|wiki)){1,4}(?:\.html|\/)$/$frame,3p,match-case"#], Default::default()); + let request = Request::new("https://examples.top/articles.html", + "https://examples.top/articles.html", + "frame").unwrap(); + assert!(engine.check_network_request(&request).matched); + }*/ + { + // /^https?:\/\/[a-z]{8,15}\.top\/[a-z]{4,}\.json$/$xhr,3p,match-case + let engine = Engine::from_rules_debug([r#"/^https?:\/\/[a-z]{8,15}\.top\/[a-z]{4,}\.json$/$xhr,3p,match-case"#], Default::default()); + let request = Request::new("https://examples.top/abcd.json", "https://examples.com/abcd.json", "xhr").unwrap(); + assert!(engine.check_network_request(&request).matched); + } + // fails - inferring unescaped `$` inside regex pattern + /*{ + // /^https?:\/\/[a-z]{8,15}\.top\/[-a-z]{4,}\.css\?aHR0c[\/0-9a-zA-Z]{33,}=?=?$/$css,3p,match-case + let engine = Engine::from_rules_debug([r#"/^https?:\/\/[a-z]{8,15}\.top\/[-a-z]{4,}\.css\?aHR0c[\/0-9a-zA-Z]{33,}=?=?$/$css,3p,match-case"#], Default::default()); + let request = Request::new("https://examples.top/abcd.css?aHR0c/aaaaaaaaaaAAAAAAAAAA000000000012==", + "https://examples.com/abcd.css?aHR0c/aaaaaaaaaaAAAAAAAAAA000000000012==", + "stylesheet").unwrap(); + assert!(engine.check_network_request(&request).matched); + }*/ + // fails - inferring unescaped `$` inside regex pattern + /*{ + // /^https?:\/\/[a-z]{8,15}\.top\/[a-z]{4,}\.png\?aHR0c[\/0-9a-zA-Z]{33,}=?=?$/$image,3p,match-case + let engine = Engine::from_rules_debug([r#"/^https?:\/\/[a-z]{8,15}\.top\/[a-z]{4,}\.png\?aHR0c[\/0-9a-zA-Z]{33,}=?=?$/$image,3p,match-case"#], Default::default()); + let request = Request::new("https://examples.top/abcd.png?aHR0c/aaaaaaaaaaAAAAAAAAAA000000000012==", + "https://examples.com/abcd.png?aHR0c/aaaaaaaaaaAAAAAAAAAA000000000012==", + "image").unwrap(); + assert!(engine.check_network_request(&request).matched); + }*/ + // fails - because of non-supported look around operator in rust regex https://github.com/rust-lang/regex/issues/127#issuecomment-154713666 + /*{ + // /^https?:\/\/[a-z]{8,15}\.xyz(\/(?:\d{1,5}|0NaN|articles?|browse|index|movie|news|pages?|static|view|web|wiki)){1,4}(?:\.html|\/)$/$frame,3p,match-case + let engine = Engine::from_rules_debug([r#"/^https?:\/\/[a-z]{8,15}\.xyz(\/(?:\d{1,5}|0NaN|articles?|browse|index|movie|news|pages?|static|view|web|wiki)){1,4}(?:\.html|\/)$/$frame,3p,match-case"#], Default::default()); + let request = Request::new("https://examples.xyz/articles.html", + "https://examples.xyz/articles.html", + "frame").unwrap(); + assert!(engine.check_network_request(&request).matched); + }*/ + { + // /^https?:\/\/cdn\.[a-z]{4,6}\.xyz\/app\.js$/$script,3p,match-case + let engine = Engine::from_rules_debug([r#"/^https?:\/\/cdn\.[a-z]{4,6}\.xyz\/app\.js$/$script,3p,match-case"#], Default::default()); + let request = Request::new("https://cdn.abcde.xyz/app.js", + "https://cdn.abcde.com/app.js", + "script").unwrap(); + assert!(engine.check_network_request(&request).matched); + } + // fails - because of non-supported look around operator in rust regex https://github.com/rust-lang/regex/issues/127#issuecomment-154713666 + /*{ + // /^https:\/\/a\.[-0-9a-z]{4,16}\.(?:club|com?|cyou|info|net|ru|site|top?|xxx|xyz)\/(?=[a-z]{0,6}[0-9A-Z])[0-9a-zA-Z]{7}\.js$/$script,match-case + let engine = Engine::from_rules_debug([r#"/^https:\/\/a\.[-0-9a-z]{4,16}\.(?:club|com?|cyou|info|net|ru|site|top?|xxx|xyz)\/(?=[a-z]{0,6}[0-9A-Z])[0-9a-zA-Z]{7}\.js$/$script,match-case"#], Default::default()); + let request = Request::new("https://a.abcd.club/aaaaaaA.js", + "https://a.abcd.club/aaaaaaA.js", + "script").unwrap(); + assert!(engine.check_network_request(&request).matched); + }*/ + { + // /^https:\/\/cdn\.jsdelivr\.net\/npm\/[-a-z_]{4,22}@latest\/dist\/script\.min\.js$/$script,3p,match-case + let engine = Engine::from_rules_debug([r#"/^https:\/\/cdn\.jsdelivr\.net\/npm\/[-a-z_]{4,22}@latest\/dist\/script\.min\.js$/$script,3p,match-case"#], Default::default()); + let request = Request::new("https://cdn.jsdelivr.net/npm/abcd@latest/dist/script.min.js", + "https://cdn.jsdelivr.com/npm/abcd@latest/dist/script.min.js", + "script").unwrap(); + assert!(engine.check_network_request(&request).matched); + } + // fails - inferring unescaped `$` inside regex pattern + /*{ + // /^https?:\/\/[-.0-9a-z]+\/script\.js$/$script,1p,strict3p,match-case + let engine = Engine::from_rules_debug([r#"/^https?:\/\/[-.0-9a-z]+\/script\.js$/$script,1p,strict3p,match-case"#], Default::default()); + let request = Request::new("https://www.example.com/script.js", + "https://www.abc.com/script.js", + "script").unwrap(); + assert!(engine.check_network_request(&request).matched); + }*/ + { + let engine = Engine::from_rules_debug([r#"/tesT߶/$domain=example.com"#], Default::default()); + let request = Request::new("https://example.com/tesT߶", + "https://example.com", + "script").unwrap(); + assert!(engine.check_network_request(&request).matched); + } + // fails - punycoded domain + /*{ + let engine = Engine::from_rules_debug([r#"/tesT߶/$domain=example.com"#], Default::default()); + let request = Request::new("https://example-tesT߶.com/tesT", + "https://example.com", + "script").unwrap(); + assert!(engine.check_network_request(&request).matched); + }*/ + } + + #[test] + fn scriptlet_permissions() { + use crate::resources::{PermissionMask, ResourceType}; + const UBO_PERM: PermissionMask = PermissionMask::from_bits(0b00000001); + const BRAVE_PERM: PermissionMask = PermissionMask::from_bits(0b00000011); + + let resources = [ + Resource::simple("refresh-defuser.js", MimeType::ApplicationJavascript, "refresh-defuser"), + Resource { + name: "trusted-set-cookie.js".to_string(), + aliases: vec![], + kind: ResourceType::Mime(MimeType::ApplicationJavascript), + content: base64::encode("trusted-set-cookie"), + dependencies: vec![], + permission: UBO_PERM, + }, + Resource { + name: "brave-fix.js".to_string(), + aliases: vec![], + kind: ResourceType::Mime(MimeType::ApplicationJavascript), + content: base64::encode("brave-fix"), + dependencies: vec![], + permission: BRAVE_PERM, + }, + ]; + + let mut filter_set = FilterSet::new(false); + filter_set.add_filters([ + "sub1.example.com##+js(refresh-defuser)", + "sub2.example.com##+js(trusted-set-cookie)", + "sub3.example.com##+js(brave-fix)" + ], Default::default()); + filter_set.add_filters([ + "sub4.example.com##+js(refresh-defuser)", + "sub5.example.com##+js(trusted-set-cookie)", + "sub6.example.com##+js(brave-fix)" + ], ParseOptions { + permissions: UBO_PERM, + ..Default::default() + }); + filter_set.add_filters([ + "sub7.example.com##+js(refresh-defuser)", + "sub8.example.com##+js(trusted-set-cookie)", + "sub9.example.com##+js(brave-fix)" + ], ParseOptions { + permissions: BRAVE_PERM, + ..Default::default() + }); + + let mut engine = Engine::from_filter_set(filter_set, true); + engine.use_resources(resources); + + fn wrap_try(scriptlet_content: &str) -> String { + format!("try {{\n{}\n}} catch ( e ) {{ }}\n", scriptlet_content) + } + + assert_eq!(engine.url_cosmetic_resources("https://sub1.example.com").injected_script, wrap_try("refresh-defuser")); + assert_eq!(engine.url_cosmetic_resources("https://sub2.example.com").injected_script, ""); + assert_eq!(engine.url_cosmetic_resources("https://sub3.example.com").injected_script, ""); + + assert_eq!(engine.url_cosmetic_resources("https://sub4.example.com").injected_script, wrap_try("refresh-defuser")); + assert_eq!(engine.url_cosmetic_resources("https://sub5.example.com").injected_script, wrap_try("trusted-set-cookie")); + assert_eq!(engine.url_cosmetic_resources("https://sub6.example.com").injected_script, ""); + + assert_eq!(engine.url_cosmetic_resources("https://sub7.example.com").injected_script, wrap_try("refresh-defuser")); + assert_eq!(engine.url_cosmetic_resources("https://sub8.example.com").injected_script, wrap_try("trusted-set-cookie")); + assert_eq!(engine.url_cosmetic_resources("https://sub9.example.com").injected_script, wrap_try("brave-fix")); + } +} diff --git a/third_party/rust/adblock/v0_7/crate/src/filters/cosmetic.rs b/third_party/rust/adblock/v0_8/crate/src/filters/cosmetic.rs similarity index 89% rename from third_party/rust/adblock/v0_7/crate/src/filters/cosmetic.rs rename to third_party/rust/adblock/v0_8/crate/src/filters/cosmetic.rs index 36c19745c385..fbfdccead1cd 100644 --- a/third_party/rust/adblock/v0_7/crate/src/filters/cosmetic.rs +++ b/third_party/rust/adblock/v0_8/crate/src/filters/cosmetic.rs @@ -1,5 +1,5 @@ -//! Tools for blocking at a page-content level, including CSS selector-based filtering and content -//! script injection. +//! Filters that take effect at a page-content level, including CSS selector-based filtering and +//! content script injection. use memchr::{memchr as find_char, memmem, memrchr as find_char_reverse}; use once_cell::sync::Lazy; @@ -7,6 +7,7 @@ use regex::Regex; use serde::{Deserialize, Serialize}; use thiserror::Error; +use crate::resources::PermissionMask; use crate::utils::Hash; use css_validation::{is_valid_css_style, validate_css_selector}; @@ -15,8 +16,6 @@ use css_validation::{is_valid_css_style, validate_css_selector}; pub enum CosmeticFilterError { #[error("punycode error")] PunycodeError, - #[error("invalid style specifier")] - InvalidStyleSpecifier, // TODO replace with `InvalidActionSpecifier` #[error("invalid action specifier")] InvalidActionSpecifier, #[error("unsupported syntax")] @@ -31,8 +30,6 @@ pub enum CosmeticFilterError { GenericUnhide, #[error("generic script inject")] GenericScriptInject, - #[error("generic style")] - GenericStyle, // TODO replace with `GenericAction` #[error("generic action")] GenericAction, #[error("double negation")] @@ -44,7 +41,8 @@ pub enum CosmeticFilterError { } /// Refer to -enum CosmeticFilterAction { +#[derive(PartialEq, Debug, Clone, Serialize, Deserialize)] +pub enum CosmeticFilterAction { Remove, /// Argument is one or more CSS property declarations, separated by the standard ;. Some /// characters, strings, and values are forbidden. @@ -55,7 +53,7 @@ enum CosmeticFilterAction { impl CosmeticFilterAction { fn new_style(style: &str) -> Result { - if !is_valid_css_style(&style) { + if !is_valid_css_style(style) { return Err(CosmeticFilterError::InvalidCssStyle); } Ok(Self::Style(style.to_string())) @@ -73,7 +71,7 @@ impl CosmeticFilterAction { /// Regex and quoted args aren't supported yet fn forbid_regex_or_quoted_args(arg: &str) -> Result<(), CosmeticFilterError> { - if arg.starts_with("/") || arg.starts_with("\"") || arg.starts_with("\'") { + if arg.starts_with('/') || arg.starts_with('\"') || arg.starts_with('\'') { return Err(CosmeticFilterError::UnsupportedSyntax); } Ok(()) @@ -107,7 +105,8 @@ pub struct CosmeticFilter { pub raw_line: Option>, pub selector: String, pub key: Option, - pub style: Option, + pub action: Option, + pub permission: PermissionMask, } pub enum CosmeticFilterLocationType { @@ -242,17 +241,17 @@ impl CosmeticFilter { const REMOVE_TOKEN: &str = ":remove()"; - const PAIRS: &[(&[u8], fn(&str) -> Result, fn() -> CosmeticFilterError)] = &[ - (STYLE_TOKEN, CosmeticFilterAction::new_style, || CosmeticFilterError::InvalidStyleSpecifier), - (REMOVE_ATTR_TOKEN, CosmeticFilterAction::new_remove_attr, || CosmeticFilterError::InvalidActionSpecifier), - (REMOVE_CLASS_TOKEN, CosmeticFilterAction::new_remove_class, || CosmeticFilterError::InvalidActionSpecifier), + const PAIRS: &[(&[u8], fn(&str) -> Result)] = &[ + (STYLE_TOKEN, CosmeticFilterAction::new_style), + (REMOVE_ATTR_TOKEN, CosmeticFilterAction::new_remove_attr), + (REMOVE_CLASS_TOKEN, CosmeticFilterAction::new_remove_class), ]; let action; let selector; 'init: { - for (token, constructor, error) in PAIRS { + for (token, constructor) in PAIRS { if let Some(i) = memmem::find(after_sharp.as_bytes(), token) { if after_sharp.ends_with(')') { // indexing safe because of find and ends_with @@ -262,13 +261,13 @@ impl CosmeticFilter { selector = &after_sharp[..i]; break 'init; } else { - return Err(error()); + return Err(CosmeticFilterError::InvalidActionSpecifier); } } } - if after_sharp.ends_with(REMOVE_TOKEN) { + if let Some(before_suffix) = after_sharp.strip_suffix(REMOVE_TOKEN) { action = Some(CosmeticFilterAction::Remove); - selector = &after_sharp[..after_sharp.len() - REMOVE_TOKEN.len()]; + selector = before_suffix; break 'init; } else { action = None; @@ -280,8 +279,9 @@ impl CosmeticFilter { } /// Parse the rule in `line` into a `CosmeticFilter`. If `debug` is true, the original rule - /// will be reported in the resulting `CosmeticFilter` struct as well. - pub fn parse(line: &str, debug: bool) -> Result { + /// will be reported in the resulting `CosmeticFilter` struct as well. Use `permission` to + /// manage the filter's access to scriptlet resources for `+js(...)` injections. + pub fn parse(line: &str, debug: bool, permission: PermissionMask) -> Result { let mut mask = CosmeticFilterMask::NONE; if let Some(sharp_index) = find_char(b'#', line.as_bytes()) { let after_sharp_index = sharp_index + 1; @@ -361,22 +361,12 @@ impl CosmeticFilter { Some(s) => s, None => return Err(CosmeticFilterError::InvalidCssSelector), }; - if sharp_index == 0 { - match action { - Some(CosmeticFilterAction::Style(_)) => return Err(CosmeticFilterError::GenericStyle), - Some(_) => return Err(CosmeticFilterError::GenericAction), - None => (), - } + if sharp_index == 0 && action.is_some() { + return Err(CosmeticFilterError::GenericAction); } (validated_selector, action) }; - let style = match action { - Some(CosmeticFilterAction::Style(s)) => Some(s), - Some(_) => return Err(CosmeticFilterError::UnsupportedSyntax), - _ => None, - }; - if (not_entities.is_some() || not_hostnames.is_some()) && mask.contains(CosmeticFilterMask::UNHIDE) { @@ -422,7 +412,8 @@ impl CosmeticFilter { }, selector, key, - style, + action, + permission, }) } else { Err(CosmeticFilterError::MissingSharp) @@ -447,12 +438,12 @@ impl CosmeticFilter { /// To account for this inconsistency, this method will generate and return the corresponding /// 'hidden' generic rule if one applies. /// - /// Note that this behavior is not applied to script injections or custom style rules. + /// Note that this behavior is not applied to script injections or rules with actions. pub fn hidden_generic_rule(&self) -> Option { if self.hostnames.is_some() || self.entities.is_some() { None } else if (self.not_hostnames.is_some() || self.not_entities.is_some()) - && (self.style.is_none() && !self.mask.contains(CosmeticFilterMask::SCRIPT_INJECT)) + && (self.action.is_none() && !self.mask.contains(CosmeticFilterMask::SCRIPT_INJECT)) { let mut generic_rule = self.clone(); generic_rule.not_hostnames = None; @@ -1023,7 +1014,7 @@ mod parse_tests { not_hostnames: Option>, selector: String, key: Option, - style: Option, + action: Option, unhide: bool, script_inject: bool, @@ -1041,7 +1032,7 @@ mod parse_tests { not_hostnames: filter.not_hostnames.as_ref().cloned(), selector: filter.selector.clone(), key: filter.key.as_ref().cloned(), - style: filter.style.as_ref().cloned(), + action: filter.action.as_ref().cloned(), unhide: filter.mask.contains(CosmeticFilterMask::UNHIDE), script_inject: filter.mask.contains(CosmeticFilterMask::SCRIPT_INJECT), @@ -1067,7 +1058,7 @@ mod parse_tests { not_hostnames: None, selector: "".to_string(), key: None, - style: None, + action: None, unhide: false, script_inject: false, @@ -1078,10 +1069,14 @@ mod parse_tests { } } + fn parse_cf(rule: &str) -> Result { + CosmeticFilter::parse(rule, false, Default::default()) + } + /// Asserts that `rule` parses into a `CosmeticFilter` equivalent to the summary provided by /// `expected`. fn check_parse_result(rule: &str, expected: CosmeticFilterBreakdown) { - let filter: CosmeticFilterBreakdown = CosmeticFilter::parse(rule, false).unwrap().into(); + let filter: CosmeticFilterBreakdown = parse_cf(rule).unwrap().into(); assert_eq!(expected, filter); } @@ -1523,7 +1518,7 @@ mod parse_tests { CosmeticFilterBreakdown { selector: r#".date:not(dt)"#.to_string(), entities: sort_hash_domains(vec!["downloadsource"]), - style: Some("display: block !important;".into()), + action: Some(CosmeticFilterAction::Style("display: block !important;".into())), is_class_selector: true, key: Some("date".to_string()), ..Default::default() @@ -1538,7 +1533,7 @@ mod parse_tests { CosmeticFilterBreakdown { selector: r#".video-wrapper > video[style]"#.to_string(), hostnames: sort_hash_domains(vec!["chip.de"]), - style: Some("display:block!important;padding-top:0!important;".into()), + action: Some(CosmeticFilterAction::Style("display:block!important;padding-top:0!important;".into())), is_class_selector: true, key: Some("video-wrapper".to_string()), ..Default::default() @@ -1549,7 +1544,7 @@ mod parse_tests { CosmeticFilterBreakdown { selector: r#".advertising.medium-rectangle"#.to_string(), hostnames: sort_hash_domains(vec!["allmusic.com"]), - style: Some("min-height: 1px !important;".into()), + action: Some(CosmeticFilterAction::Style("min-height: 1px !important;".into())), is_class_selector: true, key: Some("advertising".to_string()), ..Default::default() @@ -1561,7 +1556,7 @@ mod parse_tests { CosmeticFilterBreakdown { selector: r#".signup_wall_prevent_scroll .SiteHeader, .signup_wall_prevent_scroll .LoggedOutFooter, .signup_wall_prevent_scroll .ContentWrapper"#.to_string(), hostnames: sort_hash_domains(vec!["quora.com"]), - style: Some("filter: none !important;".into()), + action: Some(CosmeticFilterAction::Style("filter: none !important;".into())), is_class_selector: true, key: Some("signup_wall_prevent_scroll".to_string()), ..Default::default() @@ -1572,7 +1567,7 @@ mod parse_tests { CosmeticFilterBreakdown { selector: r#"body#styleguide-v2"#.to_string(), hostnames: sort_hash_domains(vec!["imdb.com"]), - style: Some("background-color: #e3e2dd !important; background-image: none !important;".into()), + action: Some(CosmeticFilterAction::Style("background-color: #e3e2dd !important; background-image: none !important;".into())), ..Default::default() }, ); @@ -1581,7 +1576,7 @@ mod parse_tests { CosmeticFilterBreakdown { selector: r#"#login > div[style^="width"]"#.to_string(), hostnames: sort_hash_domains(vec!["streamcloud.eu"]), - style: Some("display: block !important".into()), + action: Some(CosmeticFilterAction::Style("display: block !important".into())), is_id_selector: true, key: Some("login".to_string()), ..Default::default() @@ -1596,7 +1591,7 @@ mod parse_tests { "moondoge.co.in", "moonliteco.in", ]), - style: Some("visibility: collapse !important".into()), + action: Some(CosmeticFilterAction::Style("visibility: collapse !important".into())), ..Default::default() }, ); @@ -1631,86 +1626,86 @@ mod parse_tests { #[test] #[cfg(feature = "css-validation")] fn unsupported() { - assert!(CosmeticFilter::parse("yandex.*##.serp-item:if(:scope > div.organic div.organic__subtitle:matches-css-after(content: /[Рр]еклама/))", false).is_err()); - assert!(CosmeticFilter::parse(r#"facebook.com,facebookcorewwwi.onion##.ego_column:if(a[href^="/campaign/landing"])"#, false).is_err()); - assert!(CosmeticFilter::parse(r#"readcomiconline.to##^script:has-text(this[atob)"#, false).is_err()); - assert!(CosmeticFilter::parse("twitter.com##article:has-text(/Promoted|Gesponsert|Реклама|Promocionado/):xpath(../..)", false).is_err()); - assert!(CosmeticFilter::parse("##", false).is_err()); - assert!(CosmeticFilter::parse("", false).is_err()); + assert!(parse_cf("yandex.*##.serp-item:if(:scope > div.organic div.organic__subtitle:matches-css-after(content: /[Рр]еклама/))").is_err()); + assert!(parse_cf(r#"facebook.com,facebookcorewwwi.onion##.ego_column:if(a[href^="/campaign/landing"])"#).is_err()); + assert!(parse_cf(r#"readcomiconline.to##^script:has-text(this[atob)"#).is_err()); + assert!(parse_cf("twitter.com##article:has-text(/Promoted|Gesponsert|Реклама|Promocionado/):xpath(../..)").is_err()); + assert!(parse_cf("##").is_err()); + assert!(parse_cf("").is_err()); // `:has` was previously limited to procedural filtering, but is now a native CSS feature. - assert!(CosmeticFilter::parse(r#"thedailywtf.com##.article-body > div:has(a[href*="utm_medium"])"#, false).is_ok()); + assert!(parse_cf(r#"thedailywtf.com##.article-body > div:has(a[href*="utm_medium"])"#).is_ok()); } #[test] fn hidden_generic() { - let rule = CosmeticFilter::parse("##.selector", false).unwrap(); + let rule = parse_cf("##.selector").unwrap(); assert!(rule.hidden_generic_rule().is_none()); - let rule = CosmeticFilter::parse("test.com##.selector", false).unwrap(); + let rule = parse_cf("test.com##.selector").unwrap(); assert!(rule.hidden_generic_rule().is_none()); - let rule = CosmeticFilter::parse("test.*##.selector", false).unwrap(); + let rule = parse_cf("test.*##.selector").unwrap(); assert!(rule.hidden_generic_rule().is_none()); - let rule = CosmeticFilter::parse("test.com,~a.test.com##.selector", false).unwrap(); + let rule = parse_cf("test.com,~a.test.com##.selector").unwrap(); assert!(rule.hidden_generic_rule().is_none()); - let rule = CosmeticFilter::parse("test.*,~a.test.com##.selector", false).unwrap(); + let rule = parse_cf("test.*,~a.test.com##.selector").unwrap(); assert!(rule.hidden_generic_rule().is_none()); - let rule = CosmeticFilter::parse("test.*,~a.test.*##.selector", false).unwrap(); + let rule = parse_cf("test.*,~a.test.*##.selector").unwrap(); assert!(rule.hidden_generic_rule().is_none()); - let rule = CosmeticFilter::parse("test.com#@#.selector", false).unwrap(); + let rule = parse_cf("test.com#@#.selector").unwrap(); assert!(rule.hidden_generic_rule().is_none()); - let rule = CosmeticFilter::parse("~test.com##.selector", false).unwrap(); + let rule = parse_cf("~test.com##.selector").unwrap(); assert_eq!( CosmeticFilterBreakdown::from(rule.hidden_generic_rule().unwrap()), - CosmeticFilter::parse("##.selector", false).unwrap().into(), + parse_cf("##.selector").unwrap().into(), ); - let rule = CosmeticFilter::parse("~test.*##.selector", false).unwrap(); + let rule = parse_cf("~test.*##.selector").unwrap(); assert_eq!( CosmeticFilterBreakdown::from(rule.hidden_generic_rule().unwrap()), - CosmeticFilter::parse("##.selector", false).unwrap().into(), + parse_cf("##.selector").unwrap().into(), ); - let rule = CosmeticFilter::parse("~test.*,~a.test.*##.selector", false).unwrap(); + let rule = parse_cf("~test.*,~a.test.*##.selector").unwrap(); assert_eq!( CosmeticFilterBreakdown::from(rule.hidden_generic_rule().unwrap()), - CosmeticFilter::parse("##.selector", false).unwrap().into(), + parse_cf("##.selector").unwrap().into(), ); - let rule = CosmeticFilter::parse("test.com##.selector:style(border-radius: 13px)", false).unwrap(); + let rule = parse_cf("test.com##.selector:style(border-radius: 13px)").unwrap(); assert!(rule.hidden_generic_rule().is_none()); - let rule = CosmeticFilter::parse("test.*##.selector:style(border-radius: 13px)", false).unwrap(); + let rule = parse_cf("test.*##.selector:style(border-radius: 13px)").unwrap(); assert!(rule.hidden_generic_rule().is_none()); - let rule = CosmeticFilter::parse("~test.com##.selector:style(border-radius: 13px)", false).unwrap(); + let rule = parse_cf("~test.com##.selector:style(border-radius: 13px)").unwrap(); assert!(rule.hidden_generic_rule().is_none()); - let rule = CosmeticFilter::parse("~test.*##.selector:style(border-radius: 13px)", false).unwrap(); + let rule = parse_cf("~test.*##.selector:style(border-radius: 13px)").unwrap(); assert!(rule.hidden_generic_rule().is_none()); - let rule = CosmeticFilter::parse("test.com#@#.selector:style(border-radius: 13px)", false).unwrap(); + let rule = parse_cf("test.com#@#.selector:style(border-radius: 13px)").unwrap(); assert!(rule.hidden_generic_rule().is_none()); - let rule = CosmeticFilter::parse("test.com##+js(nowebrtc.js)", false).unwrap(); + let rule = parse_cf("test.com##+js(nowebrtc.js)").unwrap(); assert!(rule.hidden_generic_rule().is_none()); - let rule = CosmeticFilter::parse("test.*##+js(nowebrtc.js)", false).unwrap(); + let rule = parse_cf("test.*##+js(nowebrtc.js)").unwrap(); assert!(rule.hidden_generic_rule().is_none()); - let rule = CosmeticFilter::parse("~test.com##+js(nowebrtc.js)", false).unwrap(); + let rule = parse_cf("~test.com##+js(nowebrtc.js)").unwrap(); assert!(rule.hidden_generic_rule().is_none()); - let rule = CosmeticFilter::parse("~test.*##+js(nowebrtc.js)", false).unwrap(); + let rule = parse_cf("~test.*##+js(nowebrtc.js)").unwrap(); assert!(rule.hidden_generic_rule().is_none()); - let rule = CosmeticFilter::parse("test.com#@#+js(nowebrtc.js)", false).unwrap(); + let rule = parse_cf("test.com#@#+js(nowebrtc.js)").unwrap(); assert!(rule.hidden_generic_rule().is_none()); } } @@ -1824,22 +1819,26 @@ mod matching_tests { } } + fn parse_cf(rule: &str) -> Result { + CosmeticFilter::parse(rule, false, Default::default()) + } + #[test] fn generic_filter() { - let rule = CosmeticFilter::parse("##.selector", false).unwrap(); + let rule = parse_cf("##.selector").unwrap(); assert!(rule.matches_str("foo.com", "foo.com")); } #[test] fn single_domain() { - let rule = CosmeticFilter::parse("foo.com##.selector", false).unwrap(); + let rule = parse_cf("foo.com##.selector").unwrap(); assert!(rule.matches_str("foo.com", "foo.com")); assert!(!rule.matches_str("bar.com", "bar.com")); } #[test] fn multiple_domains() { - let rule = CosmeticFilter::parse("foo.com,test.com##.selector", false).unwrap(); + let rule = parse_cf("foo.com,test.com##.selector").unwrap(); assert!(rule.matches_str("foo.com", "foo.com")); assert!(rule.matches_str("test.com", "test.com")); assert!(!rule.matches_str("bar.com", "bar.com")); @@ -1847,11 +1846,11 @@ mod matching_tests { #[test] fn subdomain() { - let rule = CosmeticFilter::parse("foo.com,test.com##.selector", false).unwrap(); + let rule = parse_cf("foo.com,test.com##.selector").unwrap(); assert!(rule.matches_str("sub.foo.com", "foo.com")); assert!(rule.matches_str("sub.test.com", "test.com")); - let rule = CosmeticFilter::parse("foo.com,sub.test.com##.selector", false).unwrap(); + let rule = parse_cf("foo.com,sub.test.com##.selector").unwrap(); assert!(rule.matches_str("sub.test.com", "test.com")); assert!(!rule.matches_str("test.com", "test.com")); assert!(!rule.matches_str("com", "com")); @@ -1859,14 +1858,14 @@ mod matching_tests { #[test] fn entity() { - let rule = CosmeticFilter::parse("foo.com,sub.test.*##.selector", false).unwrap(); + let rule = parse_cf("foo.com,sub.test.*##.selector").unwrap(); assert!(rule.matches_str("foo.com", "foo.com")); assert!(rule.matches_str("bar.foo.com", "foo.com")); assert!(rule.matches_str("sub.test.com", "test.com")); assert!(rule.matches_str("sub.test.fr", "test.fr")); assert!(!rule.matches_str("sub.test.evil.biz", "evil.biz")); - let rule = CosmeticFilter::parse("foo.*##.selector", false).unwrap(); + let rule = parse_cf("foo.*##.selector").unwrap(); assert!(rule.matches_str("foo.co.uk", "foo.co.uk")); assert!(rule.matches_str("bar.foo.co.uk", "foo.co.uk")); assert!(rule.matches_str("baz.bar.foo.co.uk", "foo.co.uk")); @@ -1875,18 +1874,18 @@ mod matching_tests { #[test] fn nonmatching() { - let rule = CosmeticFilter::parse("foo.*##.selector", false).unwrap(); + let rule = parse_cf("foo.*##.selector").unwrap(); assert!(!rule.matches_str("foo.bar.com", "bar.com")); assert!(!rule.matches_str("bar-foo.com", "bar-foo.com")); } #[test] fn entity_negations() { - let rule = CosmeticFilter::parse("~foo.*##.selector", false).unwrap(); + let rule = parse_cf("~foo.*##.selector").unwrap(); assert!(!rule.matches_str("foo.com", "foo.com")); assert!(rule.matches_str("foo.evil.biz", "evil.biz")); - let rule = CosmeticFilter::parse("~foo.*,~bar.*##.selector", false).unwrap(); + let rule = parse_cf("~foo.*,~bar.*##.selector").unwrap(); assert!(rule.matches_str("baz.com", "baz.com")); assert!(!rule.matches_str("foo.com", "foo.com")); assert!(!rule.matches_str("sub.foo.com", "foo.com")); @@ -1896,13 +1895,13 @@ mod matching_tests { #[test] fn hostname_negations() { - let rule = CosmeticFilter::parse("~foo.com##.selector", false).unwrap(); + let rule = parse_cf("~foo.com##.selector").unwrap(); assert!(!rule.matches_str("foo.com", "foo.com")); assert!(!rule.matches_str("bar.foo.com", "foo.com")); assert!(rule.matches_str("foo.com.bar", "com.bar")); assert!(rule.matches_str("foo.co.uk", "foo.co.uk")); - let rule = CosmeticFilter::parse("~foo.com,~foo.de,~bar.com##.selector", false).unwrap(); + let rule = parse_cf("~foo.com,~foo.de,~bar.com##.selector").unwrap(); assert!(!rule.matches_str("foo.com", "foo.com")); assert!(!rule.matches_str("sub.foo.com", "foo.com")); assert!(!rule.matches_str("foo.de", "foo.de")); @@ -1915,7 +1914,7 @@ mod matching_tests { #[test] fn entity_with_suffix_exception() { - let rule = CosmeticFilter::parse("foo.*,~foo.com##.selector", false).unwrap(); + let rule = parse_cf("foo.*,~foo.com##.selector").unwrap(); assert!(!rule.matches_str("foo.com", "foo.com")); assert!(!rule.matches_str("sub.foo.com", "foo.com")); assert!(rule.matches_str("foo.de", "foo.de")); @@ -1924,7 +1923,7 @@ mod matching_tests { #[test] fn entity_with_subdomain_exception() { - let rule = CosmeticFilter::parse("foo.*,~sub.foo.*##.selector", false).unwrap(); + let rule = parse_cf("foo.*,~sub.foo.*##.selector").unwrap(); assert!(rule.matches_str("foo.com", "foo.com")); assert!(rule.matches_str("foo.de", "foo.de")); assert!(!rule.matches_str("sub.foo.com", "foo.com")); @@ -1934,56 +1933,55 @@ mod matching_tests { #[test] fn no_domain_provided() { - let rule = CosmeticFilter::parse("foo.*##.selector", false).unwrap(); + let rule = parse_cf("foo.*##.selector").unwrap(); assert!(!rule.matches_str("foo.com", "")); } #[test] fn no_hostname_provided() { - let rule = CosmeticFilter::parse("domain.com##.selector", false).unwrap(); + let rule = parse_cf("domain.com##.selector").unwrap(); assert!(!rule.matches_str("", "")); - let rule = CosmeticFilter::parse("domain.*##.selector", false).unwrap(); + let rule = parse_cf("domain.*##.selector").unwrap(); assert!(!rule.matches_str("", "")); - let rule = CosmeticFilter::parse("~domain.*##.selector", false).unwrap(); + let rule = parse_cf("~domain.*##.selector").unwrap(); assert!(!rule.matches_str("", "")); - let rule = CosmeticFilter::parse("~domain.com##.selector", false).unwrap(); + let rule = parse_cf("~domain.com##.selector").unwrap(); assert!(!rule.matches_str("", "")); } #[test] fn respects_etld() { - let rule = CosmeticFilter::parse("github.io##.selector", false).unwrap(); + let rule = parse_cf("github.io##.selector").unwrap(); assert!(rule.matches_str("test.github.io", "github.io")); } #[test] fn multiple_selectors() { - assert!(CosmeticFilter::parse("youtube.com##.masthead-ad-control,.ad-div,.pyv-afc-ads-container", false).is_ok()); - assert!(CosmeticFilter::parse("m.economictimes.com###appBanner,#stickyBanner", false).is_ok()); - assert!(CosmeticFilter::parse("googledrivelinks.com###wpsafe-generate, #wpsafe-link:style(display: block !important;)", false).is_ok()); + assert!(parse_cf("youtube.com##.masthead-ad-control,.ad-div,.pyv-afc-ads-container").is_ok()); + assert!(parse_cf("m.economictimes.com###appBanner,#stickyBanner").is_ok()); + assert!(parse_cf("googledrivelinks.com###wpsafe-generate, #wpsafe-link:style(display: block !important;)").is_ok()); } #[test] fn actions() { - assert!(CosmeticFilter::parse("example.com###adBanner:style(background: transparent)", false).is_ok()); - // `remove`, `remove-attr`, `remove-class` are unsupported for now - assert!(CosmeticFilter::parse("example.com###adBanner:remove()", false).is_err()); - assert!(CosmeticFilter::parse("example.com###adBanner:remove-attr(style)", false).is_err()); - assert!(CosmeticFilter::parse("example.com###adBanner:remove-class(src)", false).is_err()); + assert!(parse_cf("example.com###adBanner:style(background: transparent)").is_ok()); + assert!(parse_cf("example.com###adBanner:remove()").is_ok()); + assert!(parse_cf("example.com###adBanner:remove-attr(style)").is_ok()); + assert!(parse_cf("example.com###adBanner:remove-class(src)").is_ok()); } #[test] #[cfg(feature = "css-validation")] fn abp_has_conversion() { - let rule = CosmeticFilter::parse("imgur.com#?#div.Gallery-Sidebar-PostContainer:-abp-has(div.promoted-hover)", false).unwrap(); + let rule = parse_cf("imgur.com#?#div.Gallery-Sidebar-PostContainer:-abp-has(div.promoted-hover)").unwrap(); assert_eq!(rule.selector, "div.Gallery-Sidebar-PostContainer:has(div.promoted-hover)"); - let rule = CosmeticFilter::parse(r##"webtools.fineaty.com#?#div[class*=" hidden-"]:-abp-has(.adsbygoogle)"##, false).unwrap(); + let rule = parse_cf(r##"webtools.fineaty.com#?#div[class*=" hidden-"]:-abp-has(.adsbygoogle)"##).unwrap(); assert_eq!(rule.selector, r#"div[class*=" hidden-"]:has(.adsbygoogle)"#); - let rule = CosmeticFilter::parse(r##"facebook.com,facebookcorewwwi.onion#?#._6y8t:-abp-has(a[href="/ads/about/?entry_product=ad_preferences"])"##, false).unwrap(); + let rule = parse_cf(r##"facebook.com,facebookcorewwwi.onion#?#._6y8t:-abp-has(a[href="/ads/about/?entry_product=ad_preferences"])"##).unwrap(); assert_eq!(rule.selector, r#"._6y8t:has(a[href="/ads/about/?entry_product=ad_preferences"])"#); - let rule = CosmeticFilter::parse(r##"mtgarena.pro#?##root > div > div:-abp-has(> .vm-placement)"##, false).unwrap(); + let rule = parse_cf(r##"mtgarena.pro#?##root > div > div:-abp-has(> .vm-placement)"##).unwrap(); assert_eq!(rule.selector, r#"#root > div > div:has(> .vm-placement)"#); // Error without `#?#`: - assert!(CosmeticFilter::parse(r##"mtgarena.pro###root > div > div:-abp-has(> .vm-placement)"##, false).is_err()); + assert!(parse_cf(r##"mtgarena.pro###root > div > div:-abp-has(> .vm-placement)"##).is_err()); } } diff --git a/third_party/rust/adblock/v0_7/crate/src/filters/mod.rs b/third_party/rust/adblock/v0_8/crate/src/filters/mod.rs similarity index 100% rename from third_party/rust/adblock/v0_7/crate/src/filters/mod.rs rename to third_party/rust/adblock/v0_8/crate/src/filters/mod.rs diff --git a/third_party/rust/adblock/v0_7/crate/src/filters/network.rs b/third_party/rust/adblock/v0_8/crate/src/filters/network.rs similarity index 95% rename from third_party/rust/adblock/v0_7/crate/src/filters/network.rs rename to third_party/rust/adblock/v0_8/crate/src/filters/network.rs index 095e69f516cc..edea46bd564c 100644 --- a/third_party/rust/adblock/v0_7/crate/src/filters/network.rs +++ b/third_party/rust/adblock/v0_8/crate/src/filters/network.rs @@ -1,3 +1,6 @@ +//! Filters that take effect at the network request level, including blocking and response +//! modification. + use memchr::{memchr as find_char, memmem, memrchr as find_char_reverse}; use once_cell::sync::Lazy; use regex::{Regex, RegexSet}; @@ -9,8 +12,7 @@ use std::fmt; use crate::lists::ParseOptions; use crate::regex_manager::RegexManager; use crate::request; -use crate::utils; -use crate::utils::Hash; +use crate::utils::{self, Hash}; pub const TOKENS_BUFFER_SIZE: usize = 200; @@ -999,11 +1001,6 @@ impl NetworkFilter { } } - - fn get_cpt_mask(&self) -> NetworkFilterMask { - self.mask & NetworkFilterMask::FROM_ALL_TYPES - } - pub fn is_exception(&self) -> bool { self.mask.contains(NetworkFilterMask::IS_EXCEPTION) } @@ -1357,7 +1354,7 @@ fn check_pattern_left_right_anchor_filter( let request_url = request.get_url(filter.match_case()); match &filter.filter { FilterPart::Empty => true, - FilterPart::Simple(f) => return &request_url == f, + FilterPart::Simple(f) => &request_url == f, FilterPart::AnyOf(filters) => { for f in filters { if &request_url == f { @@ -1603,8 +1600,8 @@ fn check_options(filter: &NetworkFilter, request: &request::Request) -> bool { if !check_cpt_allowed(filter, &request.request_type) || (request.is_https && !filter.for_https()) || (request.is_http && !filter.for_http()) - || (!filter.first_party() && request.is_first_party == Some(true)) - || (!filter.third_party() && request.is_third_party == Some(true)) + || (!filter.first_party() && !request.is_third_party) + || (!filter.third_party() && request.is_third_party) { return false; } @@ -2522,7 +2519,7 @@ mod parse_tests { { let filter = NetworkFilter::parse_hosts_style("example.com", true).unwrap(); assert!(filter.raw_line.is_some()); - assert_eq!(*filter.raw_line.clone().unwrap(), "||example.com^".to_string()); + assert_eq!(*filter.raw_line.clone().unwrap(), "||example.com^"); let mut defaults = default_network_filter_breakdown(); defaults.hostname = Some("example.com".to_string()); defaults.is_plain = true; @@ -2534,7 +2531,7 @@ mod parse_tests { { let filter = NetworkFilter::parse_hosts_style("www.example.com", true).unwrap(); assert!(filter.raw_line.is_some()); - assert_eq!(*filter.raw_line.clone().unwrap(), "||example.com^".to_string()); + assert_eq!(*filter.raw_line.clone().unwrap(), "||example.com^"); let mut defaults = default_network_filter_breakdown(); defaults.hostname = Some("example.com".to_string()); defaults.is_plain = true; @@ -2546,7 +2543,7 @@ mod parse_tests { { let filter = NetworkFilter::parse_hosts_style("malware.example.com", true).unwrap(); assert!(filter.raw_line.is_some()); - assert_eq!(*filter.raw_line.clone().unwrap(), "||malware.example.com^".to_string()); + assert_eq!(*filter.raw_line.clone().unwrap(), "||malware.example.com^"); let mut defaults = default_network_filter_breakdown(); defaults.hostname = Some("malware.example.com".to_string()); defaults.is_plain = true; @@ -2854,7 +2851,7 @@ mod match_tests { fn filter_match_url(filter: &str, url: &str, matching: bool) { let network_filter = NetworkFilter::parse(filter, true, Default::default()).unwrap(); - let request = request::Request::from_url(url).unwrap(); + let request = request::Request::new(url, "https://example.com", "other").unwrap(); assert!( network_filter.matches_test(&request) == matching, @@ -2868,7 +2865,7 @@ mod match_tests { fn hosts_filter_match_url(filter: &str, url: &str, matching: bool) { let network_filter = NetworkFilter::parse_hosts_style(filter, true).unwrap(); - let request = request::Request::from_url(url).unwrap(); + let request = request::Request::new(url, "https://example.com", "other").unwrap(); assert!( network_filter.matches_test(&request) == matching, @@ -3036,7 +3033,7 @@ mod match_tests { let filter = "@@||fastly.net/ad2/$image,script,xmlhttprequest"; let url = "https://0914.global.ssl.fastly.net/ad2/script/x.js?cb=1549980040838"; let network_filter = NetworkFilter::parse(filter, true, Default::default()).unwrap(); - let request = request::Request::from_urls( + let request = request::Request::new( url, "https://www.gamespot.com/metro-exodus/", "script", @@ -3053,7 +3050,7 @@ mod match_tests { let filter = "@@||swatchseries.to/public/js/edit-show.js$script,domain=swatchseries.to"; let url = "https://www1.swatchseries.to/public/js/edit-show.js"; let network_filter = NetworkFilter::parse(filter, true, Default::default()).unwrap(); - let request = request::Request::from_urls( + let request = request::Request::new( url, "https://www1.swatchseries.to/serie/roswell_new_mexico", "script", @@ -3078,16 +3075,16 @@ mod match_tests { fn check_ws_vs_http_matching() { let network_filter = NetworkFilter::parse("|ws://$domain=4shared.com", true, Default::default()).unwrap(); - assert!(network_filter.matches_test(&request::Request::from_urls("ws://example.com", "https://4shared.com", "websocket").unwrap())); - assert!(network_filter.matches_test(&request::Request::from_urls("wss://example.com", "https://4shared.com", "websocket").unwrap())); - assert!(!network_filter.matches_test(&request::Request::from_urls("http://example.com", "https://4shared.com", "script").unwrap())); - assert!(!network_filter.matches_test(&request::Request::from_urls("https://example.com", "https://4shared.com", "script").unwrap())); + assert!(network_filter.matches_test(&request::Request::new("ws://example.com", "https://4shared.com", "websocket").unwrap())); + assert!(network_filter.matches_test(&request::Request::new("wss://example.com", "https://4shared.com", "websocket").unwrap())); + assert!(!network_filter.matches_test(&request::Request::new("http://example.com", "https://4shared.com", "script").unwrap())); + assert!(!network_filter.matches_test(&request::Request::new("https://example.com", "https://4shared.com", "script").unwrap())); // The `ws://` and `wss://` protocols should be used, rather than the resource type. - assert!(network_filter.matches_test(&request::Request::from_urls("ws://example.com", "https://4shared.com", "script").unwrap())); - assert!(network_filter.matches_test(&request::Request::from_urls("wss://example.com", "https://4shared.com", "script").unwrap())); - assert!(!network_filter.matches_test(&request::Request::from_urls("http://example.com", "https://4shared.com", "websocket").unwrap())); - assert!(!network_filter.matches_test(&request::Request::from_urls("https://example.com", "https://4shared.com", "websocket").unwrap())); + assert!(network_filter.matches_test(&request::Request::new("ws://example.com", "https://4shared.com", "script").unwrap())); + assert!(network_filter.matches_test(&request::Request::new("wss://example.com", "https://4shared.com", "script").unwrap())); + assert!(!network_filter.matches_test(&request::Request::new("http://example.com", "https://4shared.com", "websocket").unwrap())); + assert!(!network_filter.matches_test(&request::Request::new("https://example.com", "https://4shared.com", "websocket").unwrap())); } #[test] @@ -3096,17 +3093,17 @@ mod match_tests { // cpt test { let network_filter = NetworkFilter::parse("||foo$image", true, Default::default()).unwrap(); - let request = request::Request::from_urls("https://foo.com/bar", "", "image").unwrap(); + let request = request::Request::new("https://foo.com/bar", "", "image").unwrap(); assert_eq!(check_options(&network_filter, &request), true); } { let network_filter = NetworkFilter::parse("||foo$image", true, Default::default()).unwrap(); - let request = request::Request::from_urls("https://foo.com/bar", "", "script").unwrap(); + let request = request::Request::new("https://foo.com/bar", "", "script").unwrap(); assert_eq!(check_options(&network_filter, &request), false); } { let network_filter = NetworkFilter::parse("||foo$~image", true, Default::default()).unwrap(); - let request = request::Request::from_urls("https://foo.com/bar", "", "script").unwrap(); + let request = request::Request::new("https://foo.com/bar", "", "script").unwrap(); assert_eq!(check_options(&network_filter, &request), true); } @@ -3114,14 +3111,14 @@ mod match_tests { { let network_filter = NetworkFilter::parse("||foo$~third-party", true, Default::default()).unwrap(); let request = - request::Request::from_urls("https://foo.com/bar", "http://baz.foo.com", "") + request::Request::new("https://foo.com/bar", "http://baz.foo.com", "") .unwrap(); assert_eq!(check_options(&network_filter, &request), true); } { let network_filter = NetworkFilter::parse("||foo$~third-party", true, Default::default()).unwrap(); let request = - request::Request::from_urls("https://foo.com/bar", "http://baz.bar.com", "") + request::Request::new("https://foo.com/bar", "http://baz.bar.com", "") .unwrap(); assert_eq!(check_options(&network_filter, &request), false); } @@ -3130,14 +3127,14 @@ mod match_tests { { let network_filter = NetworkFilter::parse("||foo$~first-party", true, Default::default()).unwrap(); let request = - request::Request::from_urls("https://foo.com/bar", "http://baz.bar.com", "") + request::Request::new("https://foo.com/bar", "http://baz.bar.com", "") .unwrap(); assert_eq!(check_options(&network_filter, &request), true); } { let network_filter = NetworkFilter::parse("||foo$~first-party", true, Default::default()).unwrap(); let request = - request::Request::from_urls("https://foo.com/bar", "http://baz.foo.com", "") + request::Request::new("https://foo.com/bar", "http://baz.foo.com", "") .unwrap(); assert_eq!(check_options(&network_filter, &request), false); } @@ -3146,13 +3143,13 @@ mod match_tests { { let network_filter = NetworkFilter::parse("||foo$domain=foo.com", true, Default::default()).unwrap(); let request = - request::Request::from_urls("https://foo.com/bar", "http://foo.com", "").unwrap(); + request::Request::new("https://foo.com/bar", "http://foo.com", "").unwrap(); assert_eq!(check_options(&network_filter, &request), true); } { let network_filter = NetworkFilter::parse("||foo$domain=foo.com", true, Default::default()).unwrap(); let request = - request::Request::from_urls("https://foo.com/bar", "http://bar.com", "").unwrap(); + request::Request::new("https://foo.com/bar", "http://bar.com", "").unwrap(); assert_eq!(check_options(&network_filter, &request), false); } @@ -3160,13 +3157,13 @@ mod match_tests { { let network_filter = NetworkFilter::parse("||foo$domain=~bar.com", true, Default::default()).unwrap(); let request = - request::Request::from_urls("https://foo.com/bar", "http://foo.com", "").unwrap(); + request::Request::new("https://foo.com/bar", "http://foo.com", "").unwrap(); assert_eq!(check_options(&network_filter, &request), true); } { let network_filter = NetworkFilter::parse("||foo$domain=~bar.com", true, Default::default()).unwrap(); let request = - request::Request::from_urls("https://foo.com/bar", "http://bar.com", "").unwrap(); + request::Request::new("https://foo.com/bar", "http://bar.com", "").unwrap(); assert_eq!(check_options(&network_filter, &request), false); } } @@ -3175,43 +3172,43 @@ mod match_tests { fn check_domain_option_subsetting_works() { { let network_filter = NetworkFilter::parse("adv$domain=example.com|~foo.example.com", true, Default::default()).unwrap(); - assert!(network_filter.matches_test(&request::Request::from_urls("http://example.net/adv", "http://example.com", "").unwrap()) == true); - assert!(network_filter.matches_test(&request::Request::from_urls("http://example.net/adv", "http://foo.example.com", "").unwrap()) == false); - assert!(network_filter.matches_test(&request::Request::from_urls("http://example.net/adv", "http://subfoo.foo.example.com", "").unwrap()) == false); - assert!(network_filter.matches_test(&request::Request::from_urls("http://example.net/adv", "http://bar.example.com", "").unwrap()) == true); - assert!(network_filter.matches_test(&request::Request::from_urls("http://example.net/adv", "http://anotherexample.com", "").unwrap()) == false); + assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://example.com", "").unwrap()) == true); + assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://foo.example.com", "").unwrap()) == false); + assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://subfoo.foo.example.com", "").unwrap()) == false); + assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://bar.example.com", "").unwrap()) == true); + assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://anotherexample.com", "").unwrap()) == false); } { let network_filter = NetworkFilter::parse("adv$domain=~example.com|~foo.example.com", true, Default::default()).unwrap(); - assert!(network_filter.matches_test(&request::Request::from_urls("http://example.net/adv", "http://example.com", "").unwrap()) == false); - assert!(network_filter.matches_test(&request::Request::from_urls("http://example.net/adv", "http://foo.example.com", "").unwrap()) == false); - assert!(network_filter.matches_test(&request::Request::from_urls("http://example.net/adv", "http://subfoo.foo.example.com", "").unwrap()) == false); - assert!(network_filter.matches_test(&request::Request::from_urls("http://example.net/adv", "http://bar.example.com", "").unwrap()) == false); - assert!(network_filter.matches_test(&request::Request::from_urls("http://example.net/adv", "http://anotherexample.com", "").unwrap()) == true); + assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://example.com", "").unwrap()) == false); + assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://foo.example.com", "").unwrap()) == false); + assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://subfoo.foo.example.com", "").unwrap()) == false); + assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://bar.example.com", "").unwrap()) == false); + assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://anotherexample.com", "").unwrap()) == true); } { let network_filter = NetworkFilter::parse("adv$domain=example.com|foo.example.com", true, Default::default()).unwrap(); - assert!(network_filter.matches_test(&request::Request::from_urls("http://example.net/adv", "http://example.com", "").unwrap()) == true); - assert!(network_filter.matches_test(&request::Request::from_urls("http://example.net/adv", "http://foo.example.com", "").unwrap()) == true); - assert!(network_filter.matches_test(&request::Request::from_urls("http://example.net/adv", "http://subfoo.foo.example.com", "").unwrap()) == true); - assert!(network_filter.matches_test(&request::Request::from_urls("http://example.net/adv", "http://bar.example.com", "").unwrap()) == true); - assert!(network_filter.matches_test(&request::Request::from_urls("http://example.net/adv", "http://anotherexample.com", "").unwrap()) == false); + assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://example.com", "").unwrap()) == true); + assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://foo.example.com", "").unwrap()) == true); + assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://subfoo.foo.example.com", "").unwrap()) == true); + assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://bar.example.com", "").unwrap()) == true); + assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://anotherexample.com", "").unwrap()) == false); } { let network_filter = NetworkFilter::parse("adv$domain=~example.com|foo.example.com", true, Default::default()).unwrap(); - assert!(network_filter.matches_test(&request::Request::from_urls("http://example.net/adv", "http://example.com", "").unwrap()) == false); - assert!(network_filter.matches_test(&request::Request::from_urls("http://example.net/adv", "http://foo.example.com", "").unwrap()) == false); - assert!(network_filter.matches_test(&request::Request::from_urls("http://example.net/adv", "http://subfoo.foo.example.com", "").unwrap()) == false); - assert!(network_filter.matches_test(&request::Request::from_urls("http://example.net/adv", "http://bar.example.com", "").unwrap()) == false); - assert!(network_filter.matches_test(&request::Request::from_urls("http://example.net/adv", "http://anotherexample.com", "").unwrap()) == false); + assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://example.com", "").unwrap()) == false); + assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://foo.example.com", "").unwrap()) == false); + assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://subfoo.foo.example.com", "").unwrap()) == false); + assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://bar.example.com", "").unwrap()) == false); + assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://anotherexample.com", "").unwrap()) == false); } { let network_filter = NetworkFilter::parse("adv$domain=com|~foo.com", true, Default::default()).unwrap(); - assert!(network_filter.matches_test(&request::Request::from_urls("http://example.net/adv", "http://com", "").unwrap()) == true); - assert!(network_filter.matches_test(&request::Request::from_urls("http://example.net/adv", "http://foo.com", "").unwrap()) == false); - assert!(network_filter.matches_test(&request::Request::from_urls("http://example.net/adv", "http://subfoo.foo.com", "").unwrap()) == false); - assert!(network_filter.matches_test(&request::Request::from_urls("http://example.net/adv", "http://bar.com", "").unwrap()) == true); - assert!(network_filter.matches_test(&request::Request::from_urls("http://example.net/adv", "http://co.uk", "").unwrap()) == false); + assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://com", "").unwrap()) == true); + assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://foo.com", "").unwrap()) == false); + assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://subfoo.foo.com", "").unwrap()) == false); + assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://bar.com", "").unwrap()) == true); + assert!(network_filter.matches_test(&request::Request::new("http://example.net/adv", "http://co.uk", "").unwrap()) == false); } } @@ -3250,7 +3247,7 @@ mod match_tests { let network_filter = NetworkFilter::parse(filter, true, Default::default()).unwrap(); let url = "https://bit.ly/bar/"; let source = "http://123movies.com"; - let request = request::Request::from_urls(url, source, "").unwrap(); + let request = request::Request::new(url, source, "").unwrap(); assert!( network_filter.matches_test(&request) == true, "Expected match for {} on {}", @@ -3264,7 +3261,7 @@ mod match_tests { let network_filter = NetworkFilter::parse(filter, true, Default::default()).unwrap(); let url = "https://data.foo.com/9VjjrjU9Or2aqkb8PDiqTBnULPgeI48WmYEHkYer"; let source = "http://123movies.com"; - let request = request::Request::from_urls(url, source, "xmlhttprequest").unwrap(); + let request = request::Request::new(url, source, "xmlhttprequest").unwrap(); assert!( network_filter.matches_test(&request) == true, "Expected match for {} on {}", @@ -3278,7 +3275,7 @@ mod match_tests { let network_filter = NetworkFilter::parse(filter, true, Default::default()).unwrap(); let url = "https://hello.club/123.css"; let source = "http://123movies.com"; - let request = request::Request::from_urls(url, source, "stylesheet").unwrap(); + let request = request::Request::new(url, source, "stylesheet").unwrap(); assert!( network_filter.matches_test(&request) == true, "Expected match for {} on {}", @@ -3290,14 +3287,14 @@ mod match_tests { #[test] #[ignore] // Not going to handle lookaround regexes - #[cfg(feature = "debug-info")] + #[cfg(feature = "regex-debug-info")] fn check_lookaround_regex_handled() { { let filter = r#"/^https?:\/\/([0-9a-z\-]+\.)?(9anime|animeland|animenova|animeplus|animetoon|animewow|gamestorrent|goodanime|gogoanime|igg-games|kimcartoon|memecenter|readcomiconline|toonget|toonova|watchcartoononline)\.[a-z]{2,4}\/(?!([Ee]xternal|[Ii]mages|[Ss]cripts|[Uu]ploads|ac|ajax|assets|combined|content|cov|cover|(img\/bg)|(img\/icon)|inc|jwplayer|player|playlist-cat-rss|static|thumbs|wp-content|wp-includes)\/)(.*)/$image,other,script,~third-party,xmlhttprequest,domain=~animeland.hu"#; let network_filter = NetworkFilter::parse(filter, true, Default::default()).unwrap(); let url = "https://data.foo.com/9VjjrjU9Or2aqkb8PDiqTBnULPgeI48WmYEHkYer"; let source = "http://123movies.com"; - let request = request::Request::from_urls(url, source, "script").unwrap(); + let request = request::Request::new(url, source, "script").unwrap(); let mut regex_manager = RegexManager::default(); assert!(regex_manager.get_compiled_regex_count() == 0); assert!( @@ -3317,7 +3314,7 @@ mod match_tests { let network_filter = NetworkFilter::parse(filter, true, Default::default()).unwrap(); let url = "https://example.com/ad.js"; let source = "http://auth.wi-fi.ru"; - let request = request::Request::from_urls(url, source, "script").unwrap(); + let request = request::Request::new(url, source, "script").unwrap(); assert!( network_filter.matches_test(&request) == true, "Expected match for {} on {}", @@ -3330,7 +3327,7 @@ mod match_tests { let network_filter = NetworkFilter::parse(filter, true, Default::default()).unwrap(); let url = "https://example.com/ad.js"; let source = "http://auth.wi-fi.ru"; - let request = request::Request::from_urls(url, source, "script").unwrap(); + let request = request::Request::new(url, source, "script").unwrap(); assert!( network_filter.matches_test(&request) == true, "Expected match for {} on {}", @@ -3347,8 +3344,8 @@ mod match_tests { let network_filter = NetworkFilter::parse(filter, true, Default::default()).unwrap(); let url = "https://www.google.com/aclk?sa=l&ai=DChcSEwioqMfq5ovjAhVvte0KHXBYDKoYABAJGgJkZw&sig=AOD64_0IL5OYOIkZA7qWOBt0yRmKL4hKJw&ctype=5&q=&ved=0ahUKEwjQ88Hq5ovjAhXYiVwKHWAgB5gQww8IXg&adurl="; let source = "https://www.google.com/aclk?sa=l&ai=DChcSEwioqMfq5ovjAhVvte0KHXBYDKoYABAJGgJkZw&sig=AOD64_0IL5OYOIkZA7qWOBt0yRmKL4hKJw&ctype=5&q=&ved=0ahUKEwjQ88Hq5ovjAhXYiVwKHWAgB5gQww8IXg&adurl="; - let request = request::Request::from_urls(url, source, "document").unwrap(); - assert_eq!(request.is_third_party, Some(false)); + let request = request::Request::new(url, source, "document").unwrap(); + assert!(!request.is_third_party); assert!( network_filter.matches_test(&request) == true, "Expected match for {} on {}", @@ -3361,8 +3358,8 @@ mod match_tests { let network_filter = NetworkFilter::parse(filter, true, Default::default()).unwrap(); let url = "https://www.google.com/aclk?sa=l&ai=DChcSEwioqMfq5ovjAhVvte0KHXBYDKoYABAJGgJkZw&sig=AOD64_0IL5OYOIkZA7qWOBt0yRmKL4hKJw&ctype=5&q=&ved=0ahUKEwjQ88Hq5ovjAhXYiVwKHWAgB5gQww8IXg&adurl="; let source = "https://www.google.com/aclk?sa=l&ai=DChcSEwioqMfq5ovjAhVvte0KHXBYDKoYABAJGgJkZw&sig=AOD64_0IL5OYOIkZA7qWOBt0yRmKL4hKJw&ctype=5&q=&ved=0ahUKEwjQ88Hq5ovjAhXYiVwKHWAgB5gQww8IXg&adurl="; - let request = request::Request::from_urls(url, source, "main_frame").unwrap(); - assert_eq!(request.is_third_party, Some(false)); + let request = request::Request::new(url, source, "main_frame").unwrap(); + assert!(!request.is_third_party); assert!( network_filter.matches_test(&request) == true, "Expected match for {} on {}", @@ -3388,17 +3385,17 @@ mod match_tests { mod hash_collision_tests { use super::*; - use crate::utils::Hash; + use crate::test_utils; use crate::lists::parse_filters; use std::collections::HashMap; #[test] fn check_rule_ids_no_collisions() { - let rules = utils::rules_from_lists(&[ - String::from("data/easylist.to/easylist/easylist.txt"), - String::from("data/easylist.to/easylist/easyprivacy.txt"), + let rules = test_utils::rules_from_lists([ + "data/easylist.to/easylist/easylist.txt", + "data/easylist.to/easylist/easyprivacy.txt", ]); - let (network_filters, _) = parse_filters(&rules, true, Default::default()); + let (network_filters, _) = parse_filters(rules, true, Default::default()); let mut filter_ids: HashMap = HashMap::new(); diff --git a/third_party/rust/adblock/v0_7/crate/src/lib.rs b/third_party/rust/adblock/v0_8/crate/src/lib.rs similarity index 93% rename from third_party/rust/adblock/v0_7/crate/src/lib.rs rename to third_party/rust/adblock/v0_8/crate/src/lib.rs index 69fde0994657..956f0827652a 100644 --- a/third_party/rust/adblock/v0_7/crate/src/lib.rs +++ b/third_party/rust/adblock/v0_8/crate/src/lib.rs @@ -15,18 +15,16 @@ //! //! Check the [`Engine`] documentation to get started with adblocking. -#![allow(dead_code)] - // Own modules, currently everything is exposed, will need to limit pub mod blocker; #[cfg(feature = "content-blocking")] pub mod content_blocking; pub mod cosmetic_filter_cache; mod data_format; -pub mod engine; +mod engine; pub mod filters; pub mod lists; -pub mod optimizer; +mod optimizer; pub mod regex_manager; pub mod request; pub mod resources; @@ -39,6 +37,10 @@ pub use engine::Engine; #[doc(inline)] pub use lists::FilterSet; +#[cfg(test)] +#[path = "../tests/test_utils.rs"] +mod test_utils; + #[cfg(test)] mod sync_tests { #[allow(unused)] diff --git a/third_party/rust/adblock/v0_7/crate/src/lists.rs b/third_party/rust/adblock/v0_8/crate/src/lists.rs similarity index 94% rename from third_party/rust/adblock/v0_7/crate/src/lists.rs rename to third_party/rust/adblock/v0_8/crate/src/lists.rs index a262225678ed..d5c5a94f0a44 100644 --- a/third_party/rust/adblock/v0_7/crate/src/lists.rs +++ b/third_party/rust/adblock/v0_8/crate/src/lists.rs @@ -4,6 +4,7 @@ use std::convert::TryFrom; use crate::filters::network::{NetworkFilter, NetworkFilterError}; use crate::filters::cosmetic::{CosmeticFilter, CosmeticFilterError}; +use crate::resources::PermissionMask; use itertools::{Either, Itertools}; use memchr::memchr as find_char; @@ -25,11 +26,11 @@ impl Default for RuleTypes { } impl RuleTypes { - fn loads_network_rules(&self) -> bool { + pub fn loads_network_rules(&self) -> bool { matches!(self, Self::All | Self::NetworkOnly) } - fn loads_cosmetic_rules(&self) -> bool { + pub fn loads_cosmetic_rules(&self) -> bool { matches!(self, Self::All | Self::CosmeticOnly) } } @@ -57,6 +58,10 @@ pub struct ParseOptions { /// can be loaded. #[serde(default)] pub rule_types: RuleTypes, + /// Specifies permissions to use when parsing a given filter list. See [`PermissionMask`] for + /// more info. + #[serde(default)] + pub permissions: PermissionMask, } impl Default for ParseOptions { @@ -64,6 +69,7 @@ impl Default for ParseOptions { ParseOptions { format: FilterFormat::Standard, rule_types: RuleTypes::All, + permissions: PermissionMask::default(), } } } @@ -225,13 +231,12 @@ impl FilterSet { /// parsed successfully are ignored. Returns any discovered metadata about the list of rules /// added. pub fn add_filter_list(&mut self, filter_list: &str, opts: ParseOptions) -> FilterListMetadata { - let rules = filter_list.lines().map(str::to_string).collect::>(); - self.add_filters(&rules, opts) + self.add_filters(filter_list.lines(), opts) } /// Adds a collection of filter rules to this `FilterSet`. Filters that cannot be parsed /// successfully are ignored. Returns any discovered metadata about the list of rules added. - pub fn add_filters(&mut self, filters: &[String], opts: ParseOptions) -> FilterListMetadata { + pub fn add_filters(&mut self, filters: impl IntoIterator>, opts: ParseOptions) -> FilterListMetadata { let (metadata, mut parsed_network_filters, mut parsed_cosmetic_filters) = parse_filters_with_metadata(filters, self.debug, opts); self.network_filters.append(&mut parsed_network_filters); self.cosmetic_filters.append(&mut parsed_cosmetic_filters); @@ -258,7 +263,6 @@ impl FilterSet { /// This function will fail if the `FilterSet` was not created in debug mode. #[cfg(feature = "content-blocking")] pub fn into_content_blocking(self) -> Result<(Vec, Vec), ()> { - use std::convert::TryInto; use crate::content_blocking; if !self.debug { @@ -331,10 +335,14 @@ impl Default for FilterFormat { } } +/// Describes the type of a single filter. #[derive(Debug, PartialEq)] pub enum FilterType { + /// A network filter, used for changing the behavior of network requests Network, + /// A network filter, used for changing the behavior of fetched pages Cosmetic, + /// Something else that isn't supported NotSupported, } @@ -399,7 +407,7 @@ pub fn parse_filter( (FilterType::Network, RuleTypes::All | RuleTypes::NetworkOnly) => NetworkFilter::parse(filter, debug, opts) .map(|f| f.into()) .map_err(|e| e.into()), - (FilterType::Cosmetic, RuleTypes::All | RuleTypes::CosmeticOnly) => CosmeticFilter::parse(filter, debug) + (FilterType::Cosmetic, RuleTypes::All | RuleTypes::CosmeticOnly) => CosmeticFilter::parse(filter, debug, opts.permissions) .map(|f| f.into()) .map_err(|e| e.into()), _ => Err(FilterParseError::Unsupported), @@ -453,7 +461,7 @@ pub fn parse_filter( /// Parse an entire list of filters, ignoring any errors pub fn parse_filters( - list: &[String], + list: impl IntoIterator>, debug: bool, opts: ParseOptions, ) -> (Vec, Vec) { @@ -468,18 +476,18 @@ pub fn parse_filters( /// Parse an entire list of filters, ignoring any errors pub fn parse_filters_with_metadata( - list: &[String], + list: impl IntoIterator>, debug: bool, opts: ParseOptions, ) -> (FilterListMetadata, Vec, Vec) { let mut metadata = FilterListMetadata::default(); - let list_iter = list.iter(); + let list_iter = list.into_iter(); let (network_filters, cosmetic_filters): (Vec<_>, Vec<_>) = list_iter .map(|line| { - metadata.try_add(line); - parse_filter(line, debug, opts) + metadata.try_add(line.as_ref()); + parse_filter(line.as_ref(), debug, opts) }) .filter_map(Result::ok) .partition_map(|filter| match filter { @@ -687,18 +695,18 @@ mod tests { #[test] fn test_parsing_list_metadata() { let list = [ - "[Adblock Plus 2.0]".to_string(), - "! Title: 0131 Block List".to_string(), - "! Homepage: https://austinhuang.me/0131-block-list".to_string(), - "! Licence: https://creativecommons.org/licenses/by-sa/4.0/".to_string(), - "! Expires: 7 days".to_string(), - "! Version: 20220411".to_string(), - "".to_string(), - "! => https://austinhuang.me/0131-block-list/list.txt".to_string(), + "[Adblock Plus 2.0]", + "! Title: 0131 Block List", + "! Homepage: https://austinhuang.me/0131-block-list", + "! Licence: https://creativecommons.org/licenses/by-sa/4.0/", + "! Expires: 7 days", + "! Version: 20220411", + "", + "! => https://austinhuang.me/0131-block-list/list.txt", ]; let mut filter_set = FilterSet::new(false); - let metadata = filter_set.add_filters(&list[..], ParseOptions::default()); + let metadata = filter_set.add_filters(list, ParseOptions::default()); assert_eq!(metadata.title, Some("0131 Block List".to_string())); assert_eq!(metadata.homepage, Some("https://austinhuang.me/0131-block-list".to_string())); @@ -712,19 +720,19 @@ mod tests { /// Valid fields should still be recognized and parsed accordingly. fn test_parsing_list_best_effort() { let list = [ - "[Adblock Plus 2]".to_string(), - "!-----------------------------------".to_string(), - "! ABOUT".to_string(), - "!-----------------------------------".to_string(), - "! Version: 1.2.0.0".to_string(), - "! Title: ABPVN Advanced".to_string(), - "! Last modified: 09/03/2021".to_string(), - "! Expires: 7 days (update frequency)".to_string(), - "! Homepage: https://www.haopro.net/".to_string(), + "[Adblock Plus 2]", + "!-----------------------------------", + "! ABOUT", + "!-----------------------------------", + "! Version: 1.2.0.0", + "! Title: ABPVN Advanced", + "! Last modified: 09/03/2021", + "! Expires: 7 days (update frequency)", + "! Homepage: https://www.haopro.net/", ]; let mut filter_set = FilterSet::new(false); - let metadata = filter_set.add_filters(&list[..], ParseOptions::default()); + let metadata = filter_set.add_filters(list, ParseOptions::default()); assert_eq!(metadata.title, Some("ABPVN Advanced".to_string())); assert_eq!(metadata.homepage, Some("https://www.haopro.net/".to_string())); diff --git a/third_party/rust/adblock/v0_7/crate/src/optimizer.rs b/third_party/rust/adblock/v0_8/crate/src/optimizer.rs similarity index 91% rename from third_party/rust/adblock/v0_7/crate/src/optimizer.rs rename to third_party/rust/adblock/v0_8/crate/src/optimizer.rs index ee08996f701f..04fd7256046c 100644 --- a/third_party/rust/adblock/v0_7/crate/src/optimizer.rs +++ b/third_party/rust/adblock/v0_8/crate/src/optimizer.rs @@ -102,7 +102,7 @@ impl Optimization for SimplePatternGroup { } } - let is_regex = filters.iter().find(|f| f.is_regex()).is_some(); + let is_regex = filters.iter().any(NetworkFilter::is_regex); filter.mask.set(NetworkFilterMask::IS_REGEX, is_regex); let is_complete_regex = filters.iter().any(|f| f.is_complete_regex()); filter @@ -224,7 +224,7 @@ mod optimization_tests_pattern_group { url_path: &str, matches: bool, ) { - let is_match = filter.matches(&Request::from_urls( + let is_match = filter.matches(&Request::new( ("https://example.com/".to_string() + url_path).as_str(), "https://google.com", "" @@ -269,12 +269,12 @@ mod optimization_tests_pattern_group { #[test] fn combines_simple_regex_patterns() { - let rules = vec![ - String::from("/static/ad-"), - String::from("/static/ad."), - String::from("/static/ad/*"), - String::from("/static/ads/*"), - String::from("/static/adv/*"), + let rules = [ + "/static/ad-", + "/static/ad.", + "/static/ad/*", + "/static/ads/*", + "/static/adv/*", ]; let (filters, _) = lists::parse_filters(&rules, true, Default::default()); @@ -328,12 +328,12 @@ mod optimization_tests_pattern_group { #[test] fn separates_pattern_by_grouping() { - let rules = vec![ - String::from("/analytics-v1."), - String::from("/v1/pixel?"), - String::from("/api/v1/stat?"), - String::from("/analytics/v1/*$domain=~my.leadpages.net"), - String::from("/v1/ads/*"), + let rules = [ + "/analytics-v1.", + "/v1/pixel?", + "/api/v1/stat?", + "/analytics/v1/*$domain=~my.leadpages.net", + "/v1/ads/*", ]; let (filters, _) = lists::parse_filters(&rules, true, Default::default()); @@ -350,7 +350,7 @@ mod optimization_tests_pattern_group { ); assert!(filter.matches_test( - &Request::from_urls( + &Request::new( "https://example.com/v1/pixel?", "https://my.leadpages.net", "" @@ -366,7 +366,7 @@ mod optimization_tests_pattern_group { ); assert!(filter.matches_test( - &Request::from_urls( + &Request::new( "https://example.com/analytics/v1/foobar", "https://foo.leadpages.net", "" @@ -386,9 +386,9 @@ mod optimization_tests_union_domain { #[test] fn merges_domains() { - let rules = vec![ - String::from("/analytics-v1$domain=google.com"), - String::from("/analytics-v1$domain=example.com"), + let rules = [ + "/analytics-v1$domain=google.com", + "/analytics-v1$domain=example.com", ]; let (filters, _) = lists::parse_filters(&rules, true, Default::default()); @@ -414,7 +414,7 @@ mod optimization_tests_union_domain { assert!( filter.matches_test( - &Request::from_urls( + &Request::new( "https://example.com/analytics-v1/foobar", "https://google.com", "" @@ -424,7 +424,7 @@ mod optimization_tests_union_domain { ); assert!( filter.matches_test( - &Request::from_urls( + &Request::new( "https://example.com/analytics-v1/foobar", "https://foo.leadpages.net", "" @@ -436,10 +436,10 @@ mod optimization_tests_union_domain { #[test] fn skips_rules_with_no_domain() { - let rules = vec![ - String::from("/analytics-v1$domain=google.com"), - String::from("/analytics-v1$domain=example.com"), - String::from("/analytics-v1"), + let rules = [ + "/analytics-v1$domain=google.com", + "/analytics-v1$domain=example.com", + "/analytics-v1", ]; let (filters, _) = lists::parse_filters(&rules, true, Default::default()); @@ -453,11 +453,11 @@ mod optimization_tests_union_domain { #[test] fn optimises_domains() { - let rules = vec![ - String::from("/analytics-v1$domain=google.com"), - String::from("/analytics-v1$domain=example.com"), - String::from("/analytics-v1$domain=exampleone.com|exampletwo.com"), - String::from("/analytics-v1"), + let rules = [ + "/analytics-v1$domain=google.com", + "/analytics-v1$domain=example.com", + "/analytics-v1$domain=exampleone.com|exampletwo.com", + "/analytics-v1", ]; let (filters, _) = lists::parse_filters(&rules, true, Default::default()); @@ -479,7 +479,7 @@ mod optimization_tests_union_domain { assert!( filter.matches_test( - &Request::from_urls( + &Request::new( "https://example.com/analytics-v1/foobar", "https://google.com", "" @@ -489,7 +489,7 @@ mod optimization_tests_union_domain { ); assert!( filter.matches_test( - &Request::from_urls( + &Request::new( "https://example.com/analytics-v1/foobar", "https://example.com", "" @@ -499,7 +499,7 @@ mod optimization_tests_union_domain { ); assert!( filter.matches_test( - &Request::from_urls( + &Request::new( "https://example.com/analytics-v1/foobar", "https://exampletwo.com", "" @@ -509,7 +509,7 @@ mod optimization_tests_union_domain { ); assert!( filter.matches_test( - &Request::from_urls( + &Request::new( "https://example.com/analytics-v1/foobar", "https://foo.leadpages.net", "" diff --git a/third_party/rust/adblock/v0_7/crate/src/regex_manager.rs b/third_party/rust/adblock/v0_8/crate/src/regex_manager.rs similarity index 70% rename from third_party/rust/adblock/v0_7/crate/src/regex_manager.rs rename to third_party/rust/adblock/v0_8/crate/src/regex_manager.rs index 958aa2655926..6e65555eaf47 100644 --- a/third_party/rust/adblock/v0_7/crate/src/regex_manager.rs +++ b/third_party/rust/adblock/v0_8/crate/src/regex_manager.rs @@ -1,6 +1,6 @@ -//! A manager that creates/stores all regular expressions used by filters. -//! Rarely used entries could be discarded to save memory. -//! Non thread safe, the access must be synchronized externally. +//! Compiled regexes can take up large amounts of memory. To reduce the overal memory footprint of +//! the [`crate::Engine`], infrequently used regexes can be discarded. The [`RegexManager`] is +//! responsible for managing the storage of regexes used by filters. use crate::filters::network::{compile_regex, CompiledRegex, NetworkFilter}; @@ -31,10 +31,30 @@ unsafe impl Send for RegexManager {} const DEFAULT_CLEAN_UP_INTERVAL: Duration = Duration::from_secs(30); const DEFAULT_DISCARD_UNUSED_TIME: Duration = Duration::from_secs(180); +/// Reports [`RegexManager`] metrics that may be useful for creating an optimized +/// [`RegexManagerDiscardPolicy`]. +#[cfg(feature = "regex-debug-info")] +pub struct RegexDebugInfo { + /// Information about each regex contained in the [`RegexManager`]. + pub regex_data: Vec, + /// Total count of compiled regexes. + pub compiled_regex_count: usize, +} + +/// Describes metrics about a single regex from the [`RegexManager`]. +#[cfg(feature = "regex-debug-info")] pub struct RegexDebugEntry { + /// Id for this particular regex, which is constant and unique for its lifetime. + /// + /// Note that there are no guarantees about a particular id's constancy or uniqueness beyond + /// the lifetime of a corresponding regex. pub id: u64, + /// A string representation of this regex, if available. It may be `None` if the regex has been + /// cleaned up to conserve memory. pub regex: Option, + /// When this regex was last used. pub last_used: Instant, + /// How many times this regex has been used. pub usage_count: usize, } @@ -44,8 +64,11 @@ struct RegexEntry { usage_count: usize, } +/// Used for customization of regex discarding behavior in the [`RegexManager`]. pub struct RegexManagerDiscardPolicy { + /// The [`RegexManager`] will check for and cleanup unused filters on this interval. pub cleanup_interval: Duration, + /// The [`RegexManager`] will discard a regex if it hasn't been used for this much time. pub discard_unused_time: Duration, } @@ -60,10 +83,15 @@ impl Default for RegexManagerDiscardPolicy { type RandomState = std::hash::BuildHasherDefault; +/// A manager that creates and stores all regular expressions used by filters. +/// Rarely used entries are discarded to save memory. +/// +/// The [`RegexManager`] is not thread safe, so any access to it must be synchronized externally. pub struct RegexManager { map: HashMap<*const NetworkFilter, RegexEntry, RandomState>, compiled_regex_count: usize, now: Instant, + #[cfg_attr(target_arch = "wasm32", allow(unused))] last_cleanup: Instant, discard_policy: RegexManagerDiscardPolicy, } @@ -90,6 +118,8 @@ fn make_regexp(filter: &NetworkFilter) -> CompiledRegex { } impl RegexManager { + /// Check whether or not a regex network filter matches a certain URL pattern, using the + /// [`RegexManager`]'s managed regex storage. pub fn matches(&mut self, filter: &NetworkFilter, pattern: &str) -> bool { if !filter.is_regex() && !filter.is_complete_regex() { return true; @@ -125,6 +155,9 @@ impl RegexManager { }; } + /// The [`RegexManager`] is just a struct and doesn't manage any worker threads, so this method + /// must be called periodically to ensure that it can track usage patterns of regexes over + /// time. This method will handle periodically discarding filters if necessary. #[cfg(not(target_arch = "wasm32"))] pub fn update_time(&mut self) { self.now = Instant::now(); @@ -137,7 +170,7 @@ impl RegexManager { } #[cfg(not(target_arch = "wasm32"))] - pub fn cleanup(&mut self) { + pub(crate) fn cleanup(&mut self) { let now = self.now; for v in self.map.values_mut() { if now - v.last_used >= self.discard_policy.discard_unused_time { @@ -147,11 +180,13 @@ impl RegexManager { } } + /// Customize the discard behavior of this [`RegexManager`]. pub fn set_discard_policy(&mut self, new_discard_policy: RegexManagerDiscardPolicy) { self.discard_policy = new_discard_policy; } - #[cfg(feature = "debug-info")] + /// Discard one regex, identified by its id from a [`RegexDebugEntry`]. + #[cfg(feature = "regex-debug-info")] pub fn discard_regex(&mut self, regex_id: u64) { self.map .iter_mut() @@ -161,8 +196,8 @@ impl RegexManager { }); } - #[cfg(feature = "debug-info")] - pub fn get_debug_regex_data(&self) -> Vec { + #[cfg(feature = "regex-debug-info")] + pub(crate) fn get_debug_regex_data(&self) -> Vec { use itertools::Itertools; self.map .iter() @@ -175,13 +210,22 @@ impl RegexManager { .collect_vec() } - #[cfg(feature = "debug-info")] - pub fn get_compiled_regex_count(&self) -> usize { + #[cfg(feature = "regex-debug-info")] + pub(crate) fn get_compiled_regex_count(&self) -> usize { self.compiled_regex_count } + + /// Collect metrics that may be useful for creating an optimized [`RegexManagerDiscardPolicy`]. + #[cfg(feature = "regex-debug-info")] + pub fn get_debug_info(&self) -> RegexDebugInfo { + RegexDebugInfo { + regex_data: self.get_debug_regex_data(), + compiled_regex_count: self.get_compiled_regex_count(), + } + } } -#[cfg(all(test, feature = "debug-info"))] +#[cfg(all(test, feature = "regex-debug-info"))] mod tests { use super::*; @@ -195,7 +239,7 @@ mod tests { } fn make_request(url: &str) -> request::Request { - request::Request::from_url(url).unwrap() + request::Request::new(url, "https://example.com", "other").unwrap() } fn get_active_regex_count(regex_manager: &RegexManager) -> usize { diff --git a/third_party/rust/adblock/v0_7/crate/src/request.rs b/third_party/rust/adblock/v0_8/crate/src/request.rs similarity index 68% rename from third_party/rust/adblock/v0_7/crate/src/request.rs rename to third_party/rust/adblock/v0_8/crate/src/request.rs index f58505655424..89d29842ea14 100644 --- a/third_party/rust/adblock/v0_7/crate/src/request.rs +++ b/third_party/rust/adblock/v0_8/crate/src/request.rs @@ -2,9 +2,12 @@ use std::borrow::Cow; +use thiserror::Error; + use crate::url_parser; use crate::utils; +/// The type of resource requested from the URL endpoint. #[derive(Clone, PartialEq, Debug)] pub enum RequestType { Beacon, @@ -26,10 +29,14 @@ pub enum RequestType { Xmlhttprequest, } -#[derive(Debug, PartialEq)] +/// Possible failure reasons when creating a [`Request`]. +#[derive(Debug, Error, PartialEq)] pub enum RequestError { + #[error("hostname parsing failed")] HostnameParseError, + #[error("source hostname parsing failed")] SourceHostnameParseError, + #[error("invalid Unicode provided")] UnicodeDecodingError, } @@ -70,6 +77,7 @@ fn cpt_match_type(cpt: &str) -> RequestType { } } +/// A network [`Request`], used as an interface for network blocking in the [`crate::Engine`]. #[derive(Clone, Debug)] pub struct Request { pub request_type: RequestType, @@ -77,16 +85,12 @@ pub struct Request { pub is_http: bool, pub is_https: bool, pub is_supported: bool, - pub is_first_party: Option, - pub is_third_party: Option, + pub is_third_party: bool, pub url: String, pub hostname: String, pub source_hostname_hashes: Option>, pub(crate) original_url: String, - - // mutable fields, set later - hostname_end: usize, } impl Request { @@ -105,41 +109,6 @@ impl Request { token_buffer.push(0); } - pub fn url_after_hostname(&self) -> &str { - &self.url[self.hostname_end..] - } - - pub fn new( - raw_type: &str, - url: &str, - schema: &str, - hostname: &str, - domain: &str, - source_hostname: &str, - source_domain: &str, - ) -> Request { - let third_party = if source_domain.is_empty() { - None - } else { - Some(source_domain != domain) - }; - - let hostname_end = memchr::memmem::find(url.as_bytes(), hostname.as_bytes()) - .unwrap_or(url.len()) - + hostname.len(); - - Self::from_detailed_parameters( - raw_type, - url, - schema, - hostname, - source_hostname, - third_party, - hostname_end, - url.to_string(), - ) - } - #[allow(clippy::too_many_arguments)] fn from_detailed_parameters( raw_type: &str, @@ -147,12 +116,9 @@ impl Request { schema: &str, hostname: &str, source_hostname: &str, - third_party: Option, - hostname_end: usize, + third_party: bool, original_url: String, ) -> Request { - let first_party = third_party.map(|p| !p); - let is_http: bool; let is_https: bool; let is_supported: bool; @@ -195,17 +161,16 @@ impl Request { url: url.to_owned(), hostname: hostname.to_owned(), source_hostname_hashes, - is_first_party: first_party, is_third_party: third_party, is_http, is_https, is_supported, - hostname_end, original_url, } } - pub fn from_urls( + /// Construct a new [`Request`]. + pub fn new( url: &str, source_url: &str, request_type: &str, @@ -214,11 +179,7 @@ impl Request { if let Some(parsed_source) = url_parser::parse_url(source_url) { let source_domain = parsed_source.domain(); - let third_party = if source_domain.is_empty() { - None - } else { - Some(source_domain != parsed_url.domain()) - }; + let third_party = source_domain != parsed_url.domain(); Ok(Request::from_detailed_parameters( request_type, @@ -227,7 +188,6 @@ impl Request { parsed_url.hostname(), parsed_source.hostname(), third_party, - parsed_url.hostname_pos.1, url.to_string(), )) } else { @@ -237,8 +197,7 @@ impl Request { parsed_url.schema(), parsed_url.hostname(), "", - None, - parsed_url.hostname_pos.1, + true, url.to_string(), )) } @@ -247,33 +206,19 @@ impl Request { } } - pub fn from_urls_with_hostname( + /// If you're building a [`Request`] in a context that already has access to parsed + /// representations of the input URLs, you can use this constructor to avoid extra lookups from + /// the public suffix list. Take care to pass data correctly. + pub fn preparsed( url: &str, hostname: &str, source_hostname: &str, request_type: &str, - third_party_request: Option, + third_party: bool, ) -> Request { let splitter = memchr::memchr(b':', url.as_bytes()).unwrap_or(0); let schema: &str = &url[..splitter]; - let third_party = if third_party_request.is_none() { - let (domain_start, domain_end) = url_parser::get_host_domain(hostname); - let domain = &hostname[domain_start..domain_end]; - - let (source_domain_start, source_domain_end) = - url_parser::get_host_domain(source_hostname); - let source_domain = &source_hostname[source_domain_start..source_domain_end]; - - if source_domain.is_empty() { - None - } else { - Some(source_domain != domain) - } - } else { - third_party_request - }; - Request::from_detailed_parameters( request_type, url, @@ -281,24 +226,40 @@ impl Request { hostname, source_hostname, third_party, - splitter + 2 + hostname.len(), url.to_string(), ) } - - pub fn from_url(url: &str) -> Result { - // Used in testing - assume empty source_url and default request type - Self::from_urls(url, "", "") - } } #[cfg(test)] mod tests { use super::*; + fn build_request( + raw_type: &str, + url: &str, + schema: &str, + hostname: &str, + domain: &str, + source_hostname: &str, + source_domain: &str, + ) -> Request { + let third_party = source_domain != domain; + + Request::from_detailed_parameters( + raw_type, + url, + schema, + hostname, + source_hostname, + third_party, + url.to_string(), + ) + } + #[test] fn new_works() { - let simple_example = Request::new( + let simple_example = build_request( "document", "https://example.com/ad", "https", @@ -309,8 +270,7 @@ mod tests { ); assert_eq!(simple_example.is_https, true); assert_eq!(simple_example.is_supported, true); - assert_eq!(simple_example.is_first_party, Some(true)); - assert_eq!(simple_example.is_third_party, Some(false)); + assert_eq!(simple_example.is_third_party, false); assert_eq!(simple_example.request_type, RequestType::Document); assert_eq!( simple_example.source_hostname_hashes, @@ -320,7 +280,7 @@ mod tests { ]), ); - let unsupported_example = Request::new( + let unsupported_example = build_request( "document", "file://example.com/ad", "file", @@ -333,7 +293,7 @@ mod tests { assert_eq!(unsupported_example.is_http, false); assert_eq!(unsupported_example.is_supported, false); - let first_party = Request::new( + let first_party = build_request( "document", "https://subdomain.example.com/ad", "https", @@ -344,10 +304,9 @@ mod tests { ); assert_eq!(first_party.is_https, true); assert_eq!(first_party.is_supported, true); - assert_eq!(first_party.is_first_party, Some(true)); - assert_eq!(first_party.is_third_party, Some(false)); + assert_eq!(first_party.is_third_party, false); - let third_party = Request::new( + let third_party = build_request( "document", "https://subdomain.anotherexample.com/ad", "https", @@ -358,10 +317,9 @@ mod tests { ); assert_eq!(third_party.is_https, true); assert_eq!(third_party.is_supported, true); - assert_eq!(third_party.is_first_party, Some(false)); - assert_eq!(third_party.is_third_party, Some(true)); + assert_eq!(third_party.is_third_party, true); - let websocket = Request::new( + let websocket = build_request( "document", "wss://subdomain.anotherexample.com/ad", "wss", @@ -373,11 +331,10 @@ mod tests { assert_eq!(websocket.is_https, false); assert_eq!(websocket.is_https, false); assert_eq!(websocket.is_supported, true); - assert_eq!(websocket.is_first_party, Some(false)); - assert_eq!(websocket.is_third_party, Some(true)); + assert_eq!(websocket.is_third_party, true); assert_eq!(websocket.request_type, RequestType::Websocket); - let assumed_https = Request::new( + let assumed_https = build_request( "document", "//subdomain.anotherexample.com/ad", "", @@ -399,7 +356,7 @@ mod tests { #[test] fn tokens_works() { - let simple_example = Request::new( + let simple_example = build_request( "document", "https://subdomain.example.com/ad", "https", @@ -426,7 +383,7 @@ mod tests { #[test] fn parses_urls() { - let parsed = Request::from_urls( + let parsed = Request::new( "https://subdomain.example.com/ad", "https://example.com/", "document", @@ -434,8 +391,7 @@ mod tests { .unwrap(); assert_eq!(parsed.is_https, true); assert_eq!(parsed.is_supported, true); - assert_eq!(parsed.is_first_party, Some(true)); - assert_eq!(parsed.is_third_party, Some(false)); + assert_eq!(parsed.is_third_party, false); assert_eq!(parsed.request_type, RequestType::Document); // assert_eq!(parsed.domain, "example.com"); @@ -451,7 +407,7 @@ mod tests { ); // assert_eq!(parsed.source_hostname, "example.com"); - let bad_url = Request::from_urls( + let bad_url = Request::new( "subdomain.example.com/ad", "https://example.com/", "document", @@ -459,76 +415,17 @@ mod tests { assert_eq!(bad_url.err(), Some(RequestError::HostnameParseError)); } - #[test] - fn handles_explicit_third_party_param() { - { - // domain matches - let parsed = Request::from_urls_with_hostname( - "https://subdomain.example.com/ad", - "subdomain.example.com", - "example.com", - "document", - None, - ); - assert_eq!(parsed.is_third_party, Some(false)); - } - { - // domain does not match - let parsed = Request::from_urls_with_hostname( - "https://subdomain.example.com/ad", - "subdomain.example.com", - "anotherexample.com", - "document", - None, - ); - assert_eq!(parsed.is_third_party, Some(true)); - } - { - // cannot parse domain - let parsed = Request::from_urls_with_hostname( - "https://subdomain.example.com/ad", - "subdomain.example.com", - "", - "document", - None, - ); - assert_eq!(parsed.is_third_party, None); - } - { - // third-partiness set to false - let parsed = Request::from_urls_with_hostname( - "https://subdomain.example.com/ad", - "subdomain.example.com", - "example.com", - "document", - Some(true), - ); - assert_eq!(parsed.is_third_party, Some(true)); - } - { - // third-partiness set to true - let parsed = Request::from_urls_with_hostname( - "https://subdomain.example.com/ad", - "subdomain.example.com", - "anotherexample.com", - "document", - Some(false), - ); - assert_eq!(parsed.is_third_party, Some(false)); - } - } - #[test] fn fuzzing_errors() { { - let parsed = Request::from_url("https://߶"); + let parsed = Request::new("https://߶", "https://example.com", "other"); assert!(parsed.is_ok()); } { - let parsed = Request::from_url(&format!( + let parsed = Request::new(&format!( "https://{}", std::str::from_utf8(&[9, 9, 64]).unwrap() - )); + ), "https://example.com", "other"); assert!(parsed.is_err()); } } diff --git a/third_party/rust/adblock/v0_8/crate/src/resources/mod.rs b/third_party/rust/adblock/v0_8/crate/src/resources/mod.rs new file mode 100644 index 000000000000..a84ea43e0a37 --- /dev/null +++ b/third_party/rust/adblock/v0_8/crate/src/resources/mod.rs @@ -0,0 +1,380 @@ +//! In adblocking terms, [`Resource`]s are special placeholder scripts, images, +//! video files, etc. that can be returned as drop-in replacements for harmful +//! equivalents from remote servers. Resources also encompass scriptlets, which +//! can be injected into pages to inhibit malicious behavior. +//! +//! If the `resource-assembler` feature is enabled, the +#![cfg_attr(not(feature = "resource-assembler"), doc="`resource_assembler`")] +#![cfg_attr(feature = "resource-assembler", doc="[`resource_assembler`]")] +//! module will assist with the construction of [`Resource`]s directly from the uBlock Origin +//! project. + +#[cfg(feature = "resource-assembler")] +pub mod resource_assembler; + +mod resource_storage; +#[doc(inline)] +pub use resource_storage::{AddResourceError, ResourceStorage, ScriptletResourceError}; + +use memchr::memrchr as find_char_reverse; +use serde::{Deserialize, Serialize}; + +/// Specifies a set of permissions required to inject a scriptlet resource. +/// +/// Permissions can be specified when parsing individual lists using [`crate::FilterSet`] in +/// order to propagate the permission level to all filters contained in the list. +/// +/// In practice, permissions are used to limit the risk of third-party lists having access to +/// powerful scriptlets like uBlock Origin's `trusted-set-cookie`, which has the ability to set +/// arbitrary cookies to arbitrary values on visited sites. +/// +/// ### Example +/// +/// ``` +/// # use adblock::Engine; +/// # use adblock::lists::ParseOptions; +/// # use adblock::resources::{MimeType, PermissionMask, Resource, ResourceType}; +/// # let mut filter_set = adblock::lists::FilterSet::default(); +/// # let untrusted_filters = vec![""]; +/// # let trusted_filters = vec![""]; +/// const COOKIE_ACCESS: PermissionMask = PermissionMask::from_bits(0b00000001); +/// const LOCALSTORAGE_ACCESS: PermissionMask = PermissionMask::from_bits(0b00000010); +/// +/// // `untrusted_filters` will not be able to use privileged scriptlet injections. +/// filter_set.add_filters( +/// untrusted_filters, +/// Default::default(), +/// ); +/// // `trusted_filters` will be able to inject scriptlets requiring `COOKIE_ACCESS` +/// // permissions or `LOCALSTORAGE_ACCESS` permissions. +/// filter_set.add_filters( +/// trusted_filters, +/// ParseOptions { +/// permissions: COOKIE_ACCESS | LOCALSTORAGE_ACCESS, +/// ..Default::default() +/// }, +/// ); +/// +/// let mut engine = Engine::from_filter_set(filter_set, true); +/// // The `trusted-set-cookie` scriptlet cannot be injected without `COOKIE_ACCESS` +/// // permission. +/// engine.add_resource(Resource { +/// name: "trusted-set-cookie.js".to_string(), +/// aliases: vec![], +/// kind: ResourceType::Mime(MimeType::ApplicationJavascript), +/// content: base64::encode("document.cookie = '...';"), +/// dependencies: vec![], +/// permission: COOKIE_ACCESS, +/// }); +/// ``` +#[derive(Serialize, Deserialize, Clone, Copy, Default)] +#[repr(transparent)] +#[serde(transparent)] +pub struct PermissionMask(u8); + +impl std::fmt::Debug for PermissionMask { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "PermissionMask({:b})", self.0) + } +} + +impl core::ops::BitOr for PermissionMask { + type Output = PermissionMask; + + fn bitor(self, rhs: PermissionMask) -> Self::Output { + Self(self.0 | rhs.0) + } +} + +impl core::ops::BitOrAssign for PermissionMask { + fn bitor_assign(&mut self, rhs: PermissionMask) { + self.0 |= rhs.0; + } +} + +impl PermissionMask { + /// Construct a new [`PermissionMask`] with the given bitmask. Use + /// [`PermissionMask::default()`] instead if you don't want to restrict or grant any + /// permissions. + pub const fn from_bits(bits: u8) -> Self { + Self(bits) + } + + /// Can `filter_mask` authorize injecting a resource requiring `self` permissions? + pub fn is_injectable_by(&self, filter_mask: PermissionMask) -> bool { + // For any particular bit index, the scriptlet is injectable if: + // (there is a requirement, AND the filter meets it) OR (there's no requirement) + // in other words: + // (self & filter_mask) | (!self) == 1 + // (self | !self) & (filter_mask | !self) == 1 + // filter_mask | !self == 1 + // !(filter_mask | !self) == 0 + // !filter_mask & self == 0 + // which we can compare across *all* bits using bitwise operations, hence: + !filter_mask.0 & self.0 == 0 + } + + /// The default value for [`PermissionMask`] is one which provides no additional permissions. + fn is_default(&self) -> bool { + self.0 == 0 + } +} + +/// Struct representing a resource that can be used by an adblocking engine. +#[derive(Serialize, Deserialize, Clone)] +pub struct Resource { + /// Represents the primary name of the resource, often a filename + pub name: String, + /// Represents secondary names that can be used to access the resource + #[serde(default)] + pub aliases: Vec, + /// How to interpret the resource data within `content` + pub kind: ResourceType, + /// The resource data, encoded using standard base64 configuration + pub content: String, + /// Optionally contains the name of any dependencies used by this resource. Currently, this + /// only applies to `application/javascript` and `fn/javascript` MIME types. + /// + /// Aliases should never be added to this list. It should only contain primary/canonical + /// resource names. + /// + /// Currently ignored, but will be respected in a future release. Bundle any required + /// dependencies inside the resource for now. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub dependencies: Vec, + /// Optionally defines permission levels required to use this resource for a scriptlet + /// injection. See [`PermissionMask`] for more details. + /// + /// If there is any customized permission, this resource cannot be used for redirects. + /// + /// This field is similar to the `requiresTrust` field from uBlock Origin's scriptlet + /// resources, except that it supports up to 8 different trust "domains". + #[serde(default, skip_serializing_if = "PermissionMask::is_default")] + pub permission: PermissionMask, +} + +impl Resource { + /// Convenience constructor for tests. Creates a new [`Resource`] with no aliases or + /// dependencies. Content will be automatically base64-encoded by the constructor. + #[cfg(test)] + pub fn simple(name: &str, kind: MimeType, content: &str) -> Self { + Self { + name: name.to_string(), + aliases: vec![], + kind: ResourceType::Mime(kind), + content: base64::encode(content), + dependencies: vec![], + permission: Default::default(), + } + } +} + +/// Different ways that the data within the `content` field of a `Resource` can be interpreted. +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] +#[serde(rename_all = "lowercase")] +pub enum ResourceType { + /// Interpret the data according to the MIME type represented by `type` + Mime(MimeType), + /// Interpret the data as a Javascript scriptlet template, with embedded template + /// parameters in the form of `{{1}}`, `{{2}}`, etc. Note that `Mime(ApplicationJavascript)` + /// can still be used as a templated resource, for compatibility purposes. + Template, +} + +impl ResourceType { + /// Can resources of this type be used as network redirects? + pub fn supports_redirect(&self) -> bool { + !matches!(self, ResourceType::Template | ResourceType::Mime(MimeType::FnJavascript)) + } + + /// Can resources of this type be used for scriptlet injections? + pub fn supports_scriptlet_injection(&self) -> bool { + matches!(self, ResourceType::Template | ResourceType::Mime(MimeType::ApplicationJavascript)) + } +} + +/// Acceptable MIME types for resources used by `$redirect` and `+js(...)` adblock rules. +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] +#[serde(into = "&str")] +#[serde(from = "std::borrow::Cow<'static, str>")] +pub enum MimeType { + /// `"text/css"` + TextCss, + /// `"image/gif"` + ImageGif, + /// `"text/html"` + TextHtml, + /// `"application/javascript"` + ApplicationJavascript, + /// `"audio/mp3"` + AudioMp3, + /// `"video/mp4"` + VideoMp4, + /// `"image/png"` + ImagePng, + /// `"text/plain"` + TextPlain, + /// `"text/xml"` + TextXml, + /// Custom MIME type invented for the uBlock Origin project. Represented by `"fn/javascript"`. + /// Used to describe JavaScript functions that can be used as dependencies of other JavaScript + /// resources. + FnJavascript, + /// Any other unhandled MIME type. Maps to `"application/octet-stream"` when re-serialized. + Unknown, +} + +impl MimeType { + /// Infers a resource's MIME type according to the extension of its path + pub fn from_extension(resource_path: &str) -> Self { + if let Some(extension_index) = find_char_reverse(b'.', resource_path.as_bytes()) { + match &resource_path[extension_index + 1..] { + "css" => MimeType::TextCss, + "gif" => MimeType::ImageGif, + "html" => MimeType::TextHtml, + "js" => MimeType::ApplicationJavascript, + "mp3" => MimeType::AudioMp3, + "mp4" => MimeType::VideoMp4, + "png" => MimeType::ImagePng, + "txt" => MimeType::TextPlain, + "xml" => MimeType::TextXml, + _ => { + #[cfg(test)] + eprintln!("Unrecognized file extension on: {:?}", resource_path); + MimeType::Unknown + } + } + } else { + MimeType::Unknown + } + } + + /// Should the MIME type decode as valid UTF8? + pub fn is_textual(&self) -> bool { + matches!( + self, + Self::ApplicationJavascript + | Self::FnJavascript + | Self::TextCss + | Self::TextPlain + | Self::TextHtml + | Self::TextXml + ) + } + + /// Can the MIME type have dependencies on other resources? + pub fn supports_dependencies(&self) -> bool { + matches!(self, Self::ApplicationJavascript | Self::FnJavascript) + } +} + +impl From<&str> for MimeType { + fn from(v: &str) -> Self { + match v { + "text/css" => MimeType::TextCss, + "image/gif" => MimeType::ImageGif, + "text/html" => MimeType::TextHtml, + "application/javascript" => MimeType::ApplicationJavascript, + "audio/mp3" => MimeType::AudioMp3, + "video/mp4" => MimeType::VideoMp4, + "image/png" => MimeType::ImagePng, + "text/plain" => MimeType::TextPlain, + "text/xml" => MimeType::TextXml, + "fn/javascript" => MimeType::FnJavascript, + _ => MimeType::Unknown, + } + } +} + +impl From<&MimeType> for &str { + fn from(v: &MimeType) -> Self { + match v { + MimeType::TextCss => "text/css", + MimeType::ImageGif => "image/gif", + MimeType::TextHtml => "text/html", + MimeType::ApplicationJavascript => "application/javascript", + MimeType::AudioMp3 => "audio/mp3", + MimeType::VideoMp4 => "video/mp4", + MimeType::ImagePng => "image/png", + MimeType::TextPlain => "text/plain", + MimeType::TextXml => "text/xml", + MimeType::FnJavascript => "fn/javascript", + MimeType::Unknown => "application/octet-stream", + } + } +} + +// Required for `#[serde(from = "std::borrow::Cow<'static, str>")]` +impl From> for MimeType { + fn from(v: std::borrow::Cow<'static, str>) -> Self { + v.as_ref().into() + } +} + +// Required for `#[serde(into = &str)]` +impl From for &str { + fn from(v: MimeType) -> Self { + (&v).into() + } +} + +impl std::fmt::Display for MimeType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let s: &str = self.into(); + write!(f, "{}", s) + } +} + +#[cfg(test)] +mod permission_tests { + use super::*; + + #[test] + fn test_permissions() { + { + let resource = PermissionMask(0b00000000); + assert!(resource.is_injectable_by(PermissionMask(0b00000000))); + assert!(resource.is_injectable_by(PermissionMask(0b00000001))); + assert!(resource.is_injectable_by(PermissionMask(0b00000010))); + assert!(resource.is_injectable_by(PermissionMask(0b00000011))); + assert!(resource.is_injectable_by(PermissionMask(0b10000000))); + assert!(resource.is_injectable_by(PermissionMask(0b11111111))); + } + { + let resource = PermissionMask(0b00000001); + assert!(!resource.is_injectable_by(PermissionMask(0b00000000))); + assert!(resource.is_injectable_by(PermissionMask(0b00000001))); + assert!(!resource.is_injectable_by(PermissionMask(0b00000010))); + assert!(resource.is_injectable_by(PermissionMask(0b00000011))); + assert!(!resource.is_injectable_by(PermissionMask(0b10000000))); + assert!(resource.is_injectable_by(PermissionMask(0b11111111))); + } + { + let resource = PermissionMask(0b00000010); + assert!(!resource.is_injectable_by(PermissionMask(0b00000000))); + assert!(!resource.is_injectable_by(PermissionMask(0b00000001))); + assert!(resource.is_injectable_by(PermissionMask(0b00000010))); + assert!(resource.is_injectable_by(PermissionMask(0b00000011))); + assert!(!resource.is_injectable_by(PermissionMask(0b10000000))); + assert!(resource.is_injectable_by(PermissionMask(0b11111111))); + } + { + let resource = PermissionMask(0b00000011); + assert!(!resource.is_injectable_by(PermissionMask(0b00000000))); + assert!(!resource.is_injectable_by(PermissionMask(0b00000001))); + assert!(!resource.is_injectable_by(PermissionMask(0b00000010))); + assert!(resource.is_injectable_by(PermissionMask(0b00000011))); + assert!(!resource.is_injectable_by(PermissionMask(0b10000000))); + assert!(resource.is_injectable_by(PermissionMask(0b11111111))); + } + { + let resource = PermissionMask(0b10000011); + assert!(!resource.is_injectable_by(PermissionMask(0b00000000))); + assert!(!resource.is_injectable_by(PermissionMask(0b00000001))); + assert!(!resource.is_injectable_by(PermissionMask(0b00000010))); + assert!(!resource.is_injectable_by(PermissionMask(0b00000011))); + assert!(!resource.is_injectable_by(PermissionMask(0b10000000))); + assert!(resource.is_injectable_by(PermissionMask(0b11111111))); + } + } +} diff --git a/third_party/rust/adblock/v0_7/crate/src/resources/resource_assembler.rs b/third_party/rust/adblock/v0_8/crate/src/resources/resource_assembler.rs similarity index 97% rename from third_party/rust/adblock/v0_7/crate/src/resources/resource_assembler.rs rename to third_party/rust/adblock/v0_8/crate/src/resources/resource_assembler.rs index e6e97687793c..e252e6fa82e0 100644 --- a/third_party/rust/adblock/v0_7/crate/src/resources/resource_assembler.rs +++ b/third_party/rust/adblock/v0_8/crate/src/resources/resource_assembler.rs @@ -13,17 +13,15 @@ static TOP_COMMENT_RE: Lazy = Lazy::new(|| Regex::new(r#"^/\*[\S\s]+?\n\* static NON_EMPTY_LINE_RE: Lazy = Lazy::new(|| Regex::new(r#"\S"#).unwrap()); /// Represents a single entry of the `Map` from uBlock Origin's `redirect-resources.js`. -/// -/// - `name` is the name of a resource, corresponding to its path in the `web_accessible_resources` -/// directory -/// -/// - `alias` is a list of optional additional names that can be used to reference the resource -/// -/// - `data` is either `"text"` or `"blob"`, but is currently unused in `adblock-rust`. Within -/// uBlock Origin, it's used to prevent text files from being encoded in base64 in a data URL. struct ResourceProperties { + /// The name of a resource, corresponding to its path in the `web_accessible_resources` + /// directory name: String, + /// A list of optional additional names that can be used to reference the resource alias: Vec, + /// Either `"text"` or `"blob"`, but is currently unused in `adblock-rust`. Within uBlock + /// Origin, it's used to prevent text files from being encoded in base64 in a data URL. + #[allow(unused)] data: Option, } @@ -196,6 +194,8 @@ fn read_template_resources(scriptlets_data: &str) -> Vec { .unwrap_or_default(), kind, content: base64::encode(&script), + dependencies: vec![], + permission: Default::default(), }); name = None; @@ -232,6 +232,8 @@ fn build_resource_from_file_contents( aliases, kind: ResourceType::Mime(mimetype), content, + dependencies: vec![], + permission: Default::default(), } } diff --git a/third_party/rust/adblock/v0_8/crate/src/resources/resource_storage.rs b/third_party/rust/adblock/v0_8/crate/src/resources/resource_storage.rs new file mode 100644 index 000000000000..128a13e14a11 --- /dev/null +++ b/third_party/rust/adblock/v0_8/crate/src/resources/resource_storage.rs @@ -0,0 +1,623 @@ +//! Storage and retrieval for redirect and scriptlet resources. + +use std::collections::HashMap; + +use once_cell::sync::Lazy; +use regex::Regex; +use thiserror::Error; + +use super::{MimeType, PermissionMask, Resource, ResourceType}; + +/// Unified resource storage for both redirects and scriptlets. +#[derive(Default)] +pub struct ResourceStorage { + /// Stores each resource by its canonical name + resources: HashMap, + /// Stores mappings from aliases to their canonical resource names + aliases: HashMap, +} + +impl ResourceStorage { + /// Convenience constructor that allows building storage for many resources at once. Errors are + /// silently consumed. + pub fn from_resources(resources: impl IntoIterator) -> Self { + let mut self_ = Self::default(); + + resources.into_iter().for_each(|resource| { + self_ + .add_resource(resource) + .unwrap_or_else(|_e| { + #[cfg(test)] + eprintln!("Failed to add resource: {:?}", _e) + }) + }); + + self_ + } + + /// Adds a resource to storage so that it can be retrieved later. + pub fn add_resource(&mut self, resource: Resource) -> Result<(), AddResourceError> { + if let ResourceType::Mime(content_type) = &resource.kind { + if matches!(content_type, MimeType::FnJavascript) { + return Err(AddResourceError::FnJavascriptNotSupported); + } + + if !resource.dependencies.is_empty() && !content_type.supports_dependencies() { + return Err(AddResourceError::ContentTypeDoesNotSupportDependencies); + } + + // Ensure the resource contents are valid base64 (and utf8 if applicable) + let decoded = base64::decode(&resource.content)?; + if content_type.is_textual() { + let _ = String::from_utf8(decoded)?; + } + } + + for ident in std::iter::once(&resource.name).chain(resource.aliases.iter()) { + if self.resources.contains_key(ident) || self.aliases.contains_key(ident) { + return Err(AddResourceError::NameAlreadyAdded); + } + } + + resource.aliases.iter().for_each(|alias| { + self.aliases.insert(alias.clone(), resource.name.clone()); + }); + self.resources.insert(resource.name.clone(), resource); + + Ok(()) + } + + /// Given the contents of a `+js(...)` filter part, return a scriptlet string appropriate for + /// injection in a page. + pub fn get_scriptlet_resource(&self, scriptlet_args: &str, filter_permission: PermissionMask) -> Result { + let scriptlet_args = parse_scriptlet_args(scriptlet_args); + if scriptlet_args.is_empty() { + return Err(ScriptletResourceError::MissingScriptletName); + } + + let scriptlet_name = with_js_extension(scriptlet_args[0].as_ref()); + + let args = &scriptlet_args[1..]; + if args.len() == 1 && args[0].starts_with('{') && args[0].ends_with('}') { + return Err(ScriptletResourceError::ScriptletArgObjectSyntaxUnsupported); + } + + let resource = self + .get_internal_resource(&scriptlet_name) + .ok_or(ScriptletResourceError::NoMatchingScriptlet)?; + + if !resource.permission.is_injectable_by(filter_permission) { + return Err(ScriptletResourceError::InsufficientPermissions); + } + + if !resource.kind.supports_scriptlet_injection() { + return Err(ScriptletResourceError::ContentTypeNotInjectable); + } + + let template = String::from_utf8(base64::decode(&resource.content)?)?; + + if template.starts_with("function") { + // newer function-style resource: pass args using function call syntax + use itertools::Itertools as _; + Ok(format!("({})({})", template, args.iter().map(|a| format!("'{}'", a)).join(", "))) + } else { + // older template-style resource: replace first instances with args + Ok(patch_template_scriptlet(template, args)) + } + } + + /// Get a data-URL formatted resource appropriate for a `$redirect` response. + pub fn get_redirect_resource(&self, resource_ident: &str) -> Option { + let resource = self.get_internal_resource(resource_ident); + + resource.and_then(|resource| { + if !resource.permission.is_default() { + return None; + } + if !resource.kind.supports_redirect() { + return None; + } + if let ResourceType::Mime(mime) = &resource.kind { + Some(format!("data:{};base64,{}", mime, &resource.content)) + } else { + None + } + }) + } + + /// Gets the resource associated with `resource_ident`, respecting aliases if necessary. + fn get_internal_resource(&self, resource_ident: &str) -> Option<&Resource> { + let resource = if let Some(resource) = self.resources.get(resource_ident) { + Some(resource) + } else if let Some(canonical_name) = self.aliases.get(resource_ident) { + self.resources.get(canonical_name) + } else { + None + }; + + resource + } +} + +/// Describes failure cases when preparing [`Resource`]s to be used for adblocking. +#[derive(Debug, Error, PartialEq)] +pub enum AddResourceError { + #[error("invalid base64 content")] + InvalidBase64Content, + #[error("invalid utf-8 content")] + InvalidUtf8Content, + #[error("resource name already added")] + NameAlreadyAdded, + #[error("fn/javascript mime type is not yet supported")] + FnJavascriptNotSupported, + #[error("resource content type does not support dependencies")] + ContentTypeDoesNotSupportDependencies, +} + +impl From for AddResourceError { + fn from(_: base64::DecodeError) -> Self { + AddResourceError::InvalidBase64Content + } +} + +impl From for AddResourceError { + fn from(_: std::string::FromUtf8Error) -> Self { + AddResourceError::InvalidUtf8Content + } +} + +/// Describes failure cases when attempting to retrieve a resource for scriptlet injection. +#[derive(Debug, Error, PartialEq)] +pub enum ScriptletResourceError { + #[error("no scriptlet has the provided name")] + NoMatchingScriptlet, + #[error("no scriptlet name was provided")] + MissingScriptletName, + #[error("object syntax for scriptlet arguments is unsupported")] + ScriptletArgObjectSyntaxUnsupported, + #[error("scriptlet content was corrupted")] + CorruptScriptletContent, + #[error("resource content type cannot be used for a scriptlet injection")] + ContentTypeNotInjectable, + #[error("filter rule is not authorized to inject the intended scriptlet")] + InsufficientPermissions, +} + +impl From for ScriptletResourceError { + fn from(_: base64::DecodeError) -> Self { + Self::CorruptScriptletContent + } +} + +impl From for ScriptletResourceError { + fn from(_: std::string::FromUtf8Error) -> Self { + Self::CorruptScriptletContent + } +} + +static TEMPLATE_ARGUMENT_RE: [Lazy; 9] = [ + Lazy::new(|| template_argument_regex(1)), + Lazy::new(|| template_argument_regex(2)), + Lazy::new(|| template_argument_regex(3)), + Lazy::new(|| template_argument_regex(4)), + Lazy::new(|| template_argument_regex(5)), + Lazy::new(|| template_argument_regex(6)), + Lazy::new(|| template_argument_regex(7)), + Lazy::new(|| template_argument_regex(8)), + Lazy::new(|| template_argument_regex(9)), +]; + +fn template_argument_regex(i: usize) -> Regex { + Regex::new(&format!(r"\{{\{{{}\}}\}}", i)).unwrap() +} + +/// Omit the 0th element of `args` (the scriptlet name) when calling this method. +fn patch_template_scriptlet(mut template: String, args: &[impl AsRef]) -> String { + // `regex` treats `$` as a special character. Instead, `$$` is interpreted as a literal `$` + // character. + args.iter().enumerate().for_each(|(i, arg)| { + template = TEMPLATE_ARGUMENT_RE[i] + .replace(&template, arg.as_ref().replace('$', "$$")) + .to_string(); + }); + template +} + +/// Scriptlet injections must be JS resources. However, the `.js` extension may need to be added as +/// a canonicalization step, since it can be omitted in filter rules. +fn with_js_extension(scriptlet_name: &str) -> String { + if scriptlet_name.ends_with(".js") { + scriptlet_name.to_string() + } else { + format!("{}.js", scriptlet_name) + } +} + +/// Parses the inner contents of a `+js(...)` block into a Vec of its comma-delimited elements. +/// +/// A literal comma is produced by the '\,' pattern. Otherwise, all '\', '"', and ''' characters +/// are erased in the resulting arguments. +fn parse_scriptlet_args(args: &str) -> Vec { + static ESCAPE_SCRIPTLET_ARG_RE: Lazy = Lazy::new(|| Regex::new(r#"[\\'"]"#).unwrap()); + + // Guarantee that the last character is not a backslash + let args = args.trim_end_matches('\\'); + + let mut args_vec = vec![]; + if args.trim().is_empty() { + return args_vec; + } + + let mut after_last_delim = 0; + + let comma_positions = memchr::memchr_iter(b',', args.as_bytes()); + let mut continuation = None; + for comma_pos in comma_positions.chain(std::iter::once(args.len())) { + let mut part = &args[after_last_delim..comma_pos]; + let mut is_continuation = false; + + if !part.is_empty() && part.as_bytes()[part.len() - 1] == b'\\' { + part = &part[0..part.len() - 1]; + is_continuation = true; + } + + let mut target = if let Some(s) = continuation.take() { + s + } else { + String::new() + }; + + target += part; + if is_continuation { + target += ","; + continuation = Some(target); + } else { + args_vec.push(ESCAPE_SCRIPTLET_ARG_RE.replace_all(&target, "\\$0").trim().to_string()); + } + + after_last_delim = comma_pos + 1; + } + + args_vec +} + +#[cfg(test)] +mod redirect_storage_tests { + use super::*; + + #[test] + fn get_resource_by_name() { + let mut storage = ResourceStorage::default(); + storage + .add_resource( + Resource::simple("name.js", MimeType::ApplicationJavascript, "resource data"), + ) + .unwrap(); + + assert_eq!( + storage.get_redirect_resource("name.js"), + Some(format!("data:application/javascript;base64,{}", base64::encode("resource data"))), + ); + } + + #[test] + fn get_resource_by_alias() { + let mut storage = ResourceStorage::default(); + let mut r = Resource::simple("name.js", MimeType::ApplicationJavascript, "resource data"); + r.aliases.push("alias.js".to_string()); + storage + .add_resource(r) + .unwrap(); + + assert_eq!( + storage.get_redirect_resource("alias.js"), + Some(format!("data:application/javascript;base64,{}", base64::encode("resource data"))), + ); + } + + #[test] + fn permissions() { + let mut storage = ResourceStorage::default(); + let mut r = Resource::simple("name.js", MimeType::ApplicationJavascript, "resource data"); + r.aliases.push("alias.js".to_string()); + r.permission = PermissionMask::from_bits(0b00000001); + storage + .add_resource(r) + .unwrap(); + + assert_eq!( + storage.get_redirect_resource("name.js"), + None, + ); + assert_eq!( + storage.get_redirect_resource("alias.js"), + None, + ); + } +} + +#[cfg(test)] +mod scriptlet_storage_tests { + use super::*; + + #[test] + fn parse_argslist() { + let args = parse_scriptlet_args("scriptlet, hello world, foobar"); + assert_eq!(args, vec!["scriptlet", "hello world", "foobar"]); + } + + #[test] + fn parse_argslist_noargs() { + let args = parse_scriptlet_args("scriptlet"); + assert_eq!(args, vec!["scriptlet"]); + } + + #[test] + fn parse_argslist_empty() { + let args = parse_scriptlet_args(""); + assert!(args.is_empty()); + } + + #[test] + fn parse_argslist_commas() { + let args = parse_scriptlet_args("scriptletname, one\\, two\\, three, four"); + assert_eq!(args, vec!["scriptletname", "one, two, three", "four"]); + } + + #[test] + fn parse_argslist_badchars() { + let args = parse_scriptlet_args( + r##"scriptlet, "; window.location.href = bad.com; , '; alert("you're\, hacked"); , \u\r\l(bad.com) "##, + ); + assert_eq!( + args, + vec![ + r#"scriptlet"#, + r#"\"; window.location.href = bad.com;"#, + r#"\'; alert(\"you\'re, hacked\");"#, + r#"\\u\\r\\l(bad.com)"# + ] + ); + } + + + #[test] + fn get_patched_scriptlets() { + let resources = ResourceStorage::from_resources([ + Resource { + name: "greet.js".to_string(), + aliases: vec![], + kind: ResourceType::Template, + content: base64::encode("console.log('Hello {{1}}, my name is {{2}}')"), + dependencies: vec![], + permission: Default::default(), + }, + Resource { + name: "alert.js".to_owned(), + aliases: vec![], + kind: ResourceType::Template, + content: base64::encode("alert('{{1}}')"), + dependencies: vec![], + permission: Default::default(), + }, + Resource { + name: "blocktimer.js".to_owned(), + aliases: vec![], + kind: ResourceType::Template, + content: base64::encode("setTimeout(blockAds, {{1}})"), + dependencies: vec![], + permission: Default::default(), + }, + Resource { + name: "null.js".to_owned(), + aliases: vec![], + kind: ResourceType::Template, + content: base64::encode("(()=>{})()"), + dependencies: vec![], + permission: Default::default(), + }, + Resource { + name: "set-local-storage-item.js".to_owned(), + aliases: vec![], + kind: ResourceType::Template, + content: base64::encode(r#"{{1}} that dollar signs in {{2}} are untouched"#), + dependencies: vec![], + permission: Default::default(), + }, + ]); + + assert_eq!( + resources.get_scriptlet_resource("greet, world, adblock-rust", Default::default()), + Ok("console.log('Hello world, my name is adblock-rust')".into()) + ); + assert_eq!( + resources.get_scriptlet_resource("alert, All systems are go!! ", Default::default()), + Ok("alert('All systems are go!!')".into()) + ); + assert_eq!( + resources.get_scriptlet_resource("alert, Uh oh\\, check the logs...", Default::default()), + Ok("alert('Uh oh, check the logs...')".into()) + ); + assert_eq!( + resources.get_scriptlet_resource(r#"alert, this has "quotes""#, Default::default()), + Ok(r#"alert('this has \"quotes\"')"#.into()) + ); + assert_eq!( + resources.get_scriptlet_resource("blocktimer, 3000", Default::default()), + Ok("setTimeout(blockAds, 3000)".into()) + ); + assert_eq!(resources.get_scriptlet_resource("null", Default::default()), Ok("(()=>{})()".into())); + assert_eq!( + resources.get_scriptlet_resource("null, null", Default::default()), + Ok("(()=>{})()".into()) + ); + assert_eq!( + resources.get_scriptlet_resource("greet, everybody", Default::default()), + Ok("console.log('Hello everybody, my name is {{2}}')".into()) + ); + + assert_eq!( + resources.get_scriptlet_resource("unit-testing", Default::default()), + Err(ScriptletResourceError::NoMatchingScriptlet) + ); + assert_eq!( + resources.get_scriptlet_resource("", Default::default()), + Err(ScriptletResourceError::MissingScriptletName) + ); + + assert_eq!( + resources.get_scriptlet_resource("set-local-storage-item, Test, $remove$", Default::default()), + Ok("Test that dollar signs in $remove$ are untouched".into()), + ); + } + + #[test] + fn parse_template_file_format() { + let resources = ResourceStorage::from_resources([ + Resource { + name: "abort-current-inline-script.js".into(), + aliases: vec!["acis.js".into()], + kind: ResourceType::Mime(MimeType::ApplicationJavascript), + content: base64::encode("(function() {alert(\"hi\");})();"), + dependencies: vec![], + permission: Default::default(), + }, + Resource { + name: "abort-on-property-read.js".into(), + aliases: vec!["aopr.js".into()], + kind: ResourceType::Template, + content: base64::encode("(function() {confirm(\"Do you want to {{1}}?\");})();"), + dependencies: vec![], + permission: Default::default(), + }, + Resource { + name: "googletagservices_gpt.js".into(), + aliases: vec!["googletagservices.com/gpt.js".into(), "googletagservices-gpt".into()], + kind: ResourceType::Template, + content: base64::encode("function(a1 = '', a2 = '') {console.log(a1, a2)}"), + dependencies: vec![], + permission: Default::default(), + }, + ]); + + assert_eq!( + resources.get_scriptlet_resource("aopr, code", Default::default()), + Ok("(function() {confirm(\"Do you want to code?\");})();".to_owned()), + ); + + assert_eq!( + resources.get_scriptlet_resource("abort-on-property-read, write tests", Default::default()), + Ok("(function() {confirm(\"Do you want to write tests?\");})();".to_owned()), + ); + + assert_eq!( + resources.get_scriptlet_resource("abort-on-property-read.js, block advertisements", Default::default()), + Ok("(function() {confirm(\"Do you want to block advertisements?\");})();".to_owned()), + ); + + assert_eq!( + resources.get_scriptlet_resource("acis", Default::default()), + Ok("(function() {alert(\"hi\");})();".to_owned()), + ); + + assert_eq!( + resources.get_scriptlet_resource("acis.js", Default::default()), + Ok("(function() {alert(\"hi\");})();".to_owned()), + ); + + assert_eq!( + resources.get_scriptlet_resource("googletagservices_gpt.js", Default::default()), + Ok("(function(a1 = '', a2 = '') {console.log(a1, a2)})()".to_owned()), + ); + + assert_eq!( + resources.get_scriptlet_resource("googletagservices_gpt, test1", Default::default()), + Ok("(function(a1 = '', a2 = '') {console.log(a1, a2)})('test1')".to_owned()), + ); + + assert_eq!( + resources.get_scriptlet_resource("googletagservices.com/gpt, test1, test2", Default::default()), + Ok("(function(a1 = '', a2 = '') {console.log(a1, a2)})('test1', 'test2')".to_owned()), + ); + + assert_eq!( + resources.get_scriptlet_resource(r#"googletagservices.com/gpt.js, t"es't1, $te\st2$"#, Default::default()), + Ok(r#"(function(a1 = '', a2 = '') {console.log(a1, a2)})('t\"es\'t1', '$te\\st2$')"#.to_owned()), + ); + + // The alias does not have a `.js` extension, so it cannot be used for a scriptlet + // injection (only as a redirect resource). + assert_eq!( + resources.get_scriptlet_resource(r#"googletagservices-gpt, t"es't1, te\st2"#, Default::default()), + Err(ScriptletResourceError::NoMatchingScriptlet), + ); + + // Object-style injection + assert_eq!( + resources.get_scriptlet_resource(r#"googletagservices.com/gpt, { "test": true }"#, Default::default()), + Err(ScriptletResourceError::ScriptletArgObjectSyntaxUnsupported), + ); + } + + #[test] + fn permissions() { + const PERM0: PermissionMask = PermissionMask::from_bits(0b00000001); + const PERM1: PermissionMask = PermissionMask::from_bits(0b00000010); + const PERM10: PermissionMask = PermissionMask::from_bits(0b00000011); + let resources = ResourceStorage::from_resources([ + Resource::simple("default-perms.js", MimeType::ApplicationJavascript, "default-perms"), + Resource { + name: "perm0.js".into(), + aliases: vec!["0.js".to_string()], + kind: ResourceType::Mime(MimeType::ApplicationJavascript), + content: base64::encode("perm0"), + dependencies: vec![], + permission: PERM0, + }, + Resource { + name: "perm1.js".into(), + aliases: vec!["1.js".to_string()], + kind: ResourceType::Mime(MimeType::ApplicationJavascript), + content: base64::encode("perm1"), + dependencies: vec![], + permission: PERM1, + }, + Resource { + name: "perm10.js".into(), + aliases: vec!["10.js".to_string()], + kind: ResourceType::Mime(MimeType::ApplicationJavascript), + content: base64::encode("perm10"), + dependencies: vec![], + permission: PERM10, + }, + ]); + + fn test_perm(resources: &ResourceStorage, perm: PermissionMask, expect_ok: &[&str], expect_fail: &[&str]) { + for ident in expect_ok { + if ident.len() > 2 { + assert_eq!( + resources.get_scriptlet_resource(ident, perm), + Ok(ident.to_string()), + ); + } else { + assert_eq!( + resources.get_scriptlet_resource(ident, perm), + Ok(format!("perm{}", ident)), + ); + } + } + + for ident in expect_fail { + assert_eq!( + resources.get_scriptlet_resource(ident, perm), + Err(ScriptletResourceError::InsufficientPermissions), + ); + } + } + + test_perm(&resources, Default::default(), &["default-perms"], &["perm0", "perm1", "perm10", "0", "1", "10"]); + test_perm(&resources, PERM0, &["default-perms", "perm0", "0"], &["perm1", "perm10", "1", "10"]); + test_perm(&resources, PERM1, &["default-perms", "perm1", "1"], &["perm0", "perm10", "0", "10"]); + test_perm(&resources, PERM10, &["default-perms", "perm0", "perm1", "perm10", "0", "1", "10"], &[]); + } +} diff --git a/third_party/rust/adblock/v0_7/crate/src/url_parser/mod.rs b/third_party/rust/adblock/v0_8/crate/src/url_parser/mod.rs similarity index 100% rename from third_party/rust/adblock/v0_7/crate/src/url_parser/mod.rs rename to third_party/rust/adblock/v0_8/crate/src/url_parser/mod.rs diff --git a/third_party/rust/adblock/v0_7/crate/src/url_parser/parser.rs b/third_party/rust/adblock/v0_8/crate/src/url_parser/parser.rs similarity index 100% rename from third_party/rust/adblock/v0_7/crate/src/url_parser/parser.rs rename to third_party/rust/adblock/v0_8/crate/src/url_parser/parser.rs diff --git a/third_party/rust/adblock/v0_7/crate/src/url_parser/parser_full.rs b/third_party/rust/adblock/v0_8/crate/src/url_parser/parser_full.rs similarity index 100% rename from third_party/rust/adblock/v0_7/crate/src/url_parser/parser_full.rs rename to third_party/rust/adblock/v0_8/crate/src/url_parser/parser_full.rs diff --git a/third_party/rust/adblock/v0_7/crate/src/url_parser/parser_regex.rs b/third_party/rust/adblock/v0_8/crate/src/url_parser/parser_regex.rs similarity index 100% rename from third_party/rust/adblock/v0_7/crate/src/url_parser/parser_regex.rs rename to third_party/rust/adblock/v0_8/crate/src/url_parser/parser_regex.rs diff --git a/third_party/rust/adblock/v0_7/crate/src/utils.rs b/third_party/rust/adblock/v0_8/crate/src/utils.rs similarity index 68% rename from third_party/rust/adblock/v0_7/crate/src/utils.rs rename to third_party/rust/adblock/v0_8/crate/src/utils.rs index 8c1713539033..55a3f312c6fc 100644 --- a/third_party/rust/adblock/v0_7/crate/src/utils.rs +++ b/third_party/rust/adblock/v0_8/crate/src/utils.rs @@ -5,10 +5,6 @@ use seahash::hash; #[cfg(target_pointer_width = "32")] use seahash::reference::hash; -#[cfg(not(target_arch = "wasm32"))] -use std::fs::File; -#[cfg(not(target_arch = "wasm32"))] -use std::io::{BufRead, BufReader}; pub type Hash = u64; @@ -70,41 +66,6 @@ fn fast_tokenizer_no_regex( } } -fn fast_tokenizer( - pattern: &str, - is_allowed_code: &dyn Fn(char) -> bool, - skip_first_token: bool, - skip_last_token: bool, - tokens_buffer: &mut Vec, -) { - let mut inside: bool = false; - let mut start = 0; - let chars = pattern.char_indices(); - - for (i, c) in chars { - if tokens_buffer.len() >= TOKENS_MAX { - break; - } - if is_allowed_code(c) { - if !inside { - inside = true; - start = i; - } - } else if inside { - inside = false; - if !skip_first_token || start != 0 { - let hash = fast_hash(&pattern[start..i]); - tokens_buffer.push(hash); - } - } - } - - if !skip_last_token && inside { - let hash = fast_hash(&pattern[start..]); - tokens_buffer.push(hash); - } -} - pub(crate) fn tokenize_pooled(pattern: &str, tokens_buffer: &mut Vec) { fast_tokenizer_no_regex(pattern, &is_allowed_filter, false, false, tokens_buffer); } @@ -137,50 +98,10 @@ pub(crate) fn tokenize_filter( tokens_buffer } -fn compact_tokens(tokens: &mut Vec) { - tokens.sort_unstable(); - tokens.dedup(); -} - pub(crate) fn bin_lookup(arr: &[T], elt: T) -> bool { arr.binary_search(&elt).is_ok() } -const EXPECTED_RULES: usize = 75000; -#[cfg(not(target_arch = "wasm32"))] -pub fn read_file_lines(filename: &str) -> Vec { - let f = File::open(filename).unwrap_or_else(|_| panic!("File {} not found", filename)); - let reader = BufReader::new(f); - let mut rules: Vec = Vec::with_capacity(EXPECTED_RULES); - for line in reader.lines() { - let l = line.unwrap(); - rules.push(l); - } - rules.shrink_to_fit(); - rules -} -#[cfg(not(target_arch = "wasm32"))] -pub fn rules_from_lists(lists: &[String]) -> Vec { - let mut rules: Vec = Vec::with_capacity(EXPECTED_RULES); - for filename in lists { - let mut list_rules = read_file_lines(filename); - rules.append(&mut list_rules); - } - rules.shrink_to_fit(); - rules -} - -pub(crate) fn is_eof_error(e: &rmp_serde_legacy::decode::Error) -> bool { - if let rmp_serde_legacy::decode::Error::InvalidMarkerRead(e) = e { - if e.kind() == std::io::ErrorKind::UnexpectedEof - && format!("{}", e) == "failed to fill whole buffer" - { - return true; - } - } - false -} - #[cfg(test)] mod tests { use super::*; @@ -275,13 +196,13 @@ mod tests { #[test] fn bin_lookup_works() { - assert_eq!(bin_lookup(&vec![], 42), false); - assert_eq!(bin_lookup(&vec![42], 42), true); - assert_eq!(bin_lookup(&vec![1, 2, 3, 4, 42], 42), true); - assert_eq!(bin_lookup(&vec![1, 2, 3, 4, 42], 1), true); - assert_eq!(bin_lookup(&vec![1, 2, 3, 4, 42], 3), true); - assert_eq!(bin_lookup(&vec![1, 2, 3, 4, 42], 43), false); - assert_eq!(bin_lookup(&vec![1, 2, 3, 4, 42], 0), false); - assert_eq!(bin_lookup(&vec![1, 2, 3, 4, 42], 5), false); + assert_eq!(bin_lookup(&[], 42), false); + assert_eq!(bin_lookup(&[42], 42), true); + assert_eq!(bin_lookup(&[1, 2, 3, 4, 42], 42), true); + assert_eq!(bin_lookup(&[1, 2, 3, 4, 42], 1), true); + assert_eq!(bin_lookup(&[1, 2, 3, 4, 42], 3), true); + assert_eq!(bin_lookup(&[1, 2, 3, 4, 42], 43), false); + assert_eq!(bin_lookup(&[1, 2, 3, 4, 42], 0), false); + assert_eq!(bin_lookup(&[1, 2, 3, 4, 42], 5), false); } } diff --git a/third_party/rust/flate2/v1/BUILD.gn b/third_party/rust/flate2/v1/BUILD.gn deleted file mode 100644 index 3b9a6d6b825f..000000000000 --- a/third_party/rust/flate2/v1/BUILD.gn +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright 2023 The Chromium Authors -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. - -import("//build/rust/cargo_crate.gni") - -cargo_crate("lib") { - crate_name = "flate2" - epoch = "1" - crate_type = "rlib" - crate_root = "crate/src/lib.rs" - - # Unit tests skipped. Generate with --with-tests to include them. - build_native_rust_unit_tests = false - sources = [ "crate/src/lib.rs" ] - edition = "2018" - cargo_pkg_version = "1.0.25" - cargo_pkg_authors = "Alex Crichton , Josh Triplett " - cargo_pkg_name = "flate2" - cargo_pkg_description = "DEFLATE compression and decompression exposed as Read/BufRead/Write streams. Supports miniz_oxide and multiple zlib implementations. Supports zlib, gzip, and raw deflate streams." - library_configs -= [ "//build/config/compiler:chromium_code" ] - library_configs += [ "//build/config/compiler:no_chromium_code" ] - executable_configs -= [ "//build/config/compiler:chromium_code" ] - executable_configs += [ "//build/config/compiler:no_chromium_code" ] - deps = [ - "//brave/third_party/rust/crc32fast/v1:lib", - "//brave/third_party/rust/miniz_oxide/v0_6:lib", - ] - features = [ - "miniz_oxide", - "rust_backend", - ] -} diff --git a/third_party/rust/flate2/v1/README.chromium b/third_party/rust/flate2/v1/README.chromium deleted file mode 100644 index 051581301d19..000000000000 --- a/third_party/rust/flate2/v1/README.chromium +++ /dev/null @@ -1,10 +0,0 @@ -Name: flate2 -URL: https://crates.io/crates/flate2 -Description: DEFLATE compression and decompression exposed as Read/BufRead/Write streams. -Supports miniz_oxide and multiple zlib implementations. Supports zlib, gzip, -and raw deflate streams. - -Version: 1.0.25 -Security Critical: yes -License: Apache 2.0 -Revision: 8431d9e0c0fdaea16c4643c723631223802b2c86 diff --git a/third_party/rust/flate2/v1/crate/.cargo_vcs_info.json b/third_party/rust/flate2/v1/crate/.cargo_vcs_info.json deleted file mode 100644 index bfa7c9d7fa05..000000000000 --- a/third_party/rust/flate2/v1/crate/.cargo_vcs_info.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "git": { - "sha1": "8431d9e0c0fdaea16c4643c723631223802b2c86" - }, - "path_in_vcs": "" -} \ No newline at end of file diff --git a/third_party/rust/flate2/v1/crate/.gitattributes b/third_party/rust/flate2/v1/crate/.gitattributes deleted file mode 100644 index 2c09fb782837..000000000000 --- a/third_party/rust/flate2/v1/crate/.gitattributes +++ /dev/null @@ -1,20 +0,0 @@ -# automatically handle text files -* text=auto - -# text files -*.md text -*.txt text - -# source code files -*.rs text -*.c text - -# configuration files -*.yml text -*.toml text - -# compressed files -*.gz binary - -# no end-of-line normalization should take place for integration test text files -tests/*.txt -text diff --git a/third_party/rust/flate2/v1/crate/.github/workflows/main.yml b/third_party/rust/flate2/v1/crate/.github/workflows/main.yml deleted file mode 100644 index 9cdaa501426b..000000000000 --- a/third_party/rust/flate2/v1/crate/.github/workflows/main.yml +++ /dev/null @@ -1,67 +0,0 @@ -name: CI -on: [push, pull_request] - -jobs: - test: - name: Test - runs-on: ${{ matrix.os }} - strategy: - matrix: - # I don't really understand the build matrix here... - build: [stable, beta, nightly, macos, windows, mingw] - include: - - build: stable - os: ubuntu-latest - rust: stable - - build: beta - os: ubuntu-latest - rust: beta - - build: nightly - os: ubuntu-latest - rust: nightly - - build: macos - os: macos-latest - rust: stable - - build: windows - os: windows-2019 - rust: stable - - build: mingw - os: windows-2019 - rust: stable-x86_64-gnu - steps: - - uses: actions/checkout@master - - name: Install Rust (rustup) - run: rustup update ${{ matrix.rust }} --no-self-update && rustup default ${{ matrix.rust }} - shell: bash - - run: cargo build - - run: rustdoc --test README.md -L target/debug/deps --extern flate2=target/debug/libflate2.rlib --edition=2018 - - run: cargo test - - run: cargo test --features zlib - - run: cargo test --features zlib --no-default-features - - run: cargo test --features zlib-ng-compat --no-default-features - if: matrix.build != 'mingw' - - run: cargo test --features zlib-ng --no-default-features - if: matrix.build != 'mingw' - - run: cargo test --features cloudflare_zlib --no-default-features - if: matrix.build != 'mingw' - - rustfmt: - name: Rustfmt - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@master - - name: Install Rust - run: rustup update stable && rustup default stable && rustup component add rustfmt - - run: cargo fmt -- --check - - wasm: - name: WebAssembly - runs-on: ubuntu-latest - strategy: - matrix: - target: [wasm32-unknown-unknown, wasm32-wasi] - steps: - - uses: actions/checkout@master - - name: Install Rust - run: rustup update stable && rustup default stable && rustup target add ${{ matrix.target }} - - run: cargo build --target ${{ matrix.target }} diff --git a/third_party/rust/flate2/v1/crate/.gitignore b/third_party/rust/flate2/v1/crate/.gitignore deleted file mode 100644 index 3a4ef731c599..000000000000 --- a/third_party/rust/flate2/v1/crate/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -target -Cargo.lock -examples/*.gz -.idea diff --git a/third_party/rust/flate2/v1/crate/Cargo.toml b/third_party/rust/flate2/v1/crate/Cargo.toml deleted file mode 100644 index 68516e5e752e..000000000000 --- a/third_party/rust/flate2/v1/crate/Cargo.toml +++ /dev/null @@ -1,96 +0,0 @@ -# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO -# -# When uploading crates to the registry Cargo will automatically -# "normalize" Cargo.toml files for maximal compatibility -# with all versions of Cargo and also rewrite `path` dependencies -# to registry (e.g., crates.io) dependencies. -# -# If you are reading this file be aware that the original Cargo.toml -# will likely look very different (and much more reasonable). -# See Cargo.toml.orig for the original contents. - -[package] -edition = "2018" -name = "flate2" -version = "1.0.25" -authors = [ - "Alex Crichton ", - "Josh Triplett ", -] -description = """ -DEFLATE compression and decompression exposed as Read/BufRead/Write streams. -Supports miniz_oxide and multiple zlib implementations. Supports zlib, gzip, -and raw deflate streams. -""" -homepage = "https://github.com/rust-lang/flate2-rs" -documentation = "https://docs.rs/flate2" -readme = "README.md" -keywords = [ - "gzip", - "deflate", - "zlib", - "zlib-ng", - "encoding", -] -categories = [ - "compression", - "api-bindings", -] -license = "MIT OR Apache-2.0" -repository = "https://github.com/rust-lang/flate2-rs" - -[dependencies.cloudflare-zlib-sys] -version = "0.3.0" -optional = true - -[dependencies.crc32fast] -version = "1.2.0" - -[dependencies.libz-ng-sys] -version = "1.1.8" -optional = true - -[dependencies.libz-sys] -version = "1.1.8" -optional = true -default-features = false - -[dependencies.miniz_oxide] -version = "0.6.0" -features = ["with-alloc"] -optional = true -default-features = false - -[dev-dependencies.quickcheck] -version = "1.0" -default-features = false - -[dev-dependencies.rand] -version = "0.8" - -[features] -any_zlib = [] -cloudflare_zlib = [ - "any_zlib", - "cloudflare-zlib-sys", -] -default = ["rust_backend"] -miniz-sys = ["rust_backend"] -rust_backend = ["miniz_oxide"] -zlib = [ - "any_zlib", - "libz-sys", -] -zlib-ng = [ - "any_zlib", - "libz-ng-sys", -] -zlib-ng-compat = [ - "zlib", - "libz-sys/zlib-ng", -] - -[target."cfg(all(target_arch = \"wasm32\", not(target_os = \"emscripten\")))".dependencies.miniz_oxide] -version = "0.6.0" -features = ["with-alloc"] -default-features = false diff --git a/third_party/rust/flate2/v1/crate/Cargo.toml.orig b/third_party/rust/flate2/v1/crate/Cargo.toml.orig deleted file mode 100644 index aa9a4fc7da94..000000000000 --- a/third_party/rust/flate2/v1/crate/Cargo.toml.orig +++ /dev/null @@ -1,41 +0,0 @@ -[package] -name = "flate2" -authors = ["Alex Crichton ", "Josh Triplett "] -version = "1.0.25" -edition = "2018" -license = "MIT OR Apache-2.0" -readme = "README.md" -keywords = ["gzip", "deflate", "zlib", "zlib-ng", "encoding"] -categories = ["compression", "api-bindings"] -repository = "https://github.com/rust-lang/flate2-rs" -homepage = "https://github.com/rust-lang/flate2-rs" -documentation = "https://docs.rs/flate2" -description = """ -DEFLATE compression and decompression exposed as Read/BufRead/Write streams. -Supports miniz_oxide and multiple zlib implementations. Supports zlib, gzip, -and raw deflate streams. -""" - -[dependencies] -libz-sys = { version = "1.1.8", optional = true, default-features = false } -libz-ng-sys = { version = "1.1.8", optional = true } -cloudflare-zlib-sys = { version = "0.3.0", optional = true } -miniz_oxide = { version = "0.6.0", optional = true, default-features = false, features = ["with-alloc"] } -crc32fast = "1.2.0" - -[target.'cfg(all(target_arch = "wasm32", not(target_os = "emscripten")))'.dependencies] -miniz_oxide = { version = "0.6.0", default-features = false, features = ["with-alloc"] } - -[dev-dependencies] -rand = "0.8" -quickcheck = { version = "1.0", default-features = false } - -[features] -default = ["rust_backend"] -any_zlib = [] # note: this is not a real user-facing feature -zlib = ["any_zlib", "libz-sys"] -zlib-ng-compat = ["zlib", "libz-sys/zlib-ng"] -zlib-ng = ["any_zlib", "libz-ng-sys"] -cloudflare_zlib = ["any_zlib", "cloudflare-zlib-sys"] -rust_backend = ["miniz_oxide"] -miniz-sys = ["rust_backend"] # For backwards compatibility diff --git a/third_party/rust/flate2/v1/crate/LICENSE-APACHE b/third_party/rust/flate2/v1/crate/LICENSE-APACHE deleted file mode 100644 index 16fe87b06e80..000000000000 --- a/third_party/rust/flate2/v1/crate/LICENSE-APACHE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - -TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - -1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - -2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - -3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - -4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - -5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - -6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - -7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - -8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - -9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - -END OF TERMS AND CONDITIONS - -APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - -Copyright [yyyy] [name of copyright owner] - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. diff --git a/third_party/rust/flate2/v1/crate/LICENSE-MIT b/third_party/rust/flate2/v1/crate/LICENSE-MIT deleted file mode 100644 index 39e0ed660215..000000000000 --- a/third_party/rust/flate2/v1/crate/LICENSE-MIT +++ /dev/null @@ -1,25 +0,0 @@ -Copyright (c) 2014 Alex Crichton - -Permission is hereby granted, free of charge, to any -person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the -Software without restriction, including without -limitation the rights to use, copy, modify, merge, -publish, distribute, sublicense, and/or sell copies of -the Software, and to permit persons to whom the Software -is furnished to do so, subject to the following -conditions: - -The above copyright notice and this permission notice -shall be included in all copies or substantial portions -of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF -ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT -SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR -IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. diff --git a/third_party/rust/flate2/v1/crate/README.md b/third_party/rust/flate2/v1/crate/README.md deleted file mode 100644 index d4763a458ad6..000000000000 --- a/third_party/rust/flate2/v1/crate/README.md +++ /dev/null @@ -1,114 +0,0 @@ -# flate2 - -[![Crates.io](https://img.shields.io/crates/v/flate2.svg?maxAge=2592000)](https://crates.io/crates/flate2) -[![Documentation](https://docs.rs/flate2/badge.svg)](https://docs.rs/flate2) - -A streaming compression/decompression library DEFLATE-based streams in Rust. - -This crate by default uses the `miniz_oxide` crate, a port of `miniz.c` to pure -Rust. This crate also supports other [backends](#Backends), such as the widely -available zlib library or the high-performance zlib-ng library. - -Supported formats: - -* deflate -* zlib -* gzip - -```toml -# Cargo.toml -[dependencies] -flate2 = "1.0" -``` - -## Compression - -```rust -use std::io::prelude::*; -use flate2::Compression; -use flate2::write::ZlibEncoder; - -fn main() { - let mut e = ZlibEncoder::new(Vec::new(), Compression::default()); - e.write_all(b"foo"); - e.write_all(b"bar"); - let compressed_bytes = e.finish(); -} -``` - -## Decompression - -```rust,no_run -use std::io::prelude::*; -use flate2::read::GzDecoder; - -fn main() { - let mut d = GzDecoder::new("...".as_bytes()); - let mut s = String::new(); - d.read_to_string(&mut s).unwrap(); - println!("{}", s); -} -``` - -## Backends - -The default `miniz_oxide` backend has the advantage of being pure Rust. If you -want maximum performance, you can use the zlib-ng C library: - -```toml -[dependencies] -flate2 = { version = "1.0.17", features = ["zlib-ng"], default-features = false } -``` - -Note that the `"zlib-ng"` feature works even if some other part of your crate -graph depends on zlib. - -However, if you're already using another C or Rust library that depends on -zlib, and you want to avoid including both zlib and zlib-ng, you can use that -for Rust code as well: - -```toml -[dependencies] -flate2 = { version = "1.0.17", features = ["zlib"], default-features = false } -``` - -Or, if you have C or Rust code that depends on zlib and you want to use zlib-ng -via libz-sys in zlib-compat mode, use: - -```toml -[dependencies] -flate2 = { version = "1.0.17", features = ["zlib-ng-compat"], default-features = false } -``` - -Note that when using the `"zlib-ng-compat"` feature, if any crate in your -dependency graph explicitly requests stock zlib, or uses libz-sys directly -without `default-features = false`, you'll get stock zlib rather than zlib-ng. -See [the libz-sys -README](https://github.com/rust-lang/libz-sys/blob/main/README.md) for details. -To avoid that, use the `"zlib-ng"` feature instead. - -For compatibility with previous versions of `flate2`, the Cloudflare optimized -version of zlib is available, via the `cloudflare_zlib` feature. It's not as -fast as zlib-ng, but it's faster than stock zlib. It requires an x86-64 CPU with -SSE 4.2 or ARM64 with NEON & CRC. It does not support 32-bit CPUs at all and is -incompatible with mingw. For more information check the [crate -documentation](https://crates.io/crates/cloudflare-zlib-sys). Note that -`cloudflare_zlib` will cause breakage if any other crate in your crate graph -uses another version of zlib/libz. - -# License - -This project is licensed under either of - - * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or - https://www.apache.org/licenses/LICENSE-2.0) - * MIT license ([LICENSE-MIT](LICENSE-MIT) or - https://opensource.org/licenses/MIT) - -at your option. - -### Contribution - -Unless you explicitly state otherwise, any contribution intentionally submitted -for inclusion in this project by you, as defined in the Apache-2.0 license, -shall be dual licensed as above, without any additional terms or conditions. diff --git a/third_party/rust/flate2/v1/crate/examples/compress_file.rs b/third_party/rust/flate2/v1/crate/examples/compress_file.rs deleted file mode 100644 index 39ed8eed4e8d..000000000000 --- a/third_party/rust/flate2/v1/crate/examples/compress_file.rs +++ /dev/null @@ -1,28 +0,0 @@ -extern crate flate2; - -use flate2::write::GzEncoder; -use flate2::Compression; -use std::env::args; -use std::fs::File; -use std::io::copy; -use std::io::BufReader; -use std::time::Instant; - -fn main() { - if args().len() != 3 { - eprintln!("Usage: ./compress_file `source` `target`"); - return; - } - let mut input = BufReader::new(File::open(args().nth(1).unwrap()).unwrap()); - let output = File::create(args().nth(2).unwrap()).unwrap(); - let mut encoder = GzEncoder::new(output, Compression::default()); - let start = Instant::now(); - copy(&mut input, &mut encoder).unwrap(); - let output = encoder.finish().unwrap(); - println!( - "Source len: {:?}", - input.get_ref().metadata().unwrap().len() - ); - println!("Target len: {:?}", output.metadata().unwrap().len()); - println!("Elapsed: {:?}", start.elapsed()); -} diff --git a/third_party/rust/flate2/v1/crate/examples/deflatedecoder-bufread.rs b/third_party/rust/flate2/v1/crate/examples/deflatedecoder-bufread.rs deleted file mode 100644 index 7d1cb6fd90bf..000000000000 --- a/third_party/rust/flate2/v1/crate/examples/deflatedecoder-bufread.rs +++ /dev/null @@ -1,24 +0,0 @@ -extern crate flate2; - -use flate2::bufread::DeflateDecoder; -use flate2::write::DeflateEncoder; -use flate2::Compression; -use std::io; -use std::io::prelude::*; - -// Compress a sample string and print it after transformation. -fn main() { - let mut e = DeflateEncoder::new(Vec::new(), Compression::default()); - e.write_all(b"Hello World").unwrap(); - let bytes = e.finish().unwrap(); - println!("{}", decode_reader(bytes).unwrap()); -} - -// Uncompresses a Deflate Encoded vector of bytes and returns a string or error -// Here &[u8] implements Read -fn decode_reader(bytes: Vec) -> io::Result { - let mut deflater = DeflateDecoder::new(&bytes[..]); - let mut s = String::new(); - deflater.read_to_string(&mut s)?; - Ok(s) -} diff --git a/third_party/rust/flate2/v1/crate/examples/deflatedecoder-read.rs b/third_party/rust/flate2/v1/crate/examples/deflatedecoder-read.rs deleted file mode 100644 index cc5d435be32a..000000000000 --- a/third_party/rust/flate2/v1/crate/examples/deflatedecoder-read.rs +++ /dev/null @@ -1,24 +0,0 @@ -extern crate flate2; - -use flate2::read::DeflateDecoder; -use flate2::write::DeflateEncoder; -use flate2::Compression; -use std::io; -use std::io::prelude::*; - -// Compress a sample string and print it after transformation. -fn main() { - let mut e = DeflateEncoder::new(Vec::new(), Compression::default()); - e.write_all(b"Hello World").unwrap(); - let bytes = e.finish().unwrap(); - println!("{}", decode_reader(bytes).unwrap()); -} - -// Uncompresses a Deflate Encoded vector of bytes and returns a string or error -// Here &[u8] implements Read -fn decode_reader(bytes: Vec) -> io::Result { - let mut deflater = DeflateDecoder::new(&bytes[..]); - let mut s = String::new(); - deflater.read_to_string(&mut s)?; - Ok(s) -} diff --git a/third_party/rust/flate2/v1/crate/examples/deflatedecoder-write.rs b/third_party/rust/flate2/v1/crate/examples/deflatedecoder-write.rs deleted file mode 100644 index 276490bac23e..000000000000 --- a/third_party/rust/flate2/v1/crate/examples/deflatedecoder-write.rs +++ /dev/null @@ -1,26 +0,0 @@ -extern crate flate2; - -use flate2::write::DeflateDecoder; -use flate2::write::DeflateEncoder; -use flate2::Compression; -use std::io; -use std::io::prelude::*; - -// Compress a sample string and print it after transformation. -fn main() { - let mut e = DeflateEncoder::new(Vec::new(), Compression::default()); - e.write_all(b"Hello World").unwrap(); - let bytes = e.finish().unwrap(); - println!("{}", decode_reader(bytes).unwrap()); -} - -// Uncompresses a Deflate Encoded vector of bytes and returns a string or error -// Here Vec implements Write -fn decode_reader(bytes: Vec) -> io::Result { - let mut writer = Vec::new(); - let mut deflater = DeflateDecoder::new(writer); - deflater.write_all(&bytes[..])?; - writer = deflater.finish()?; - let return_string = String::from_utf8(writer).expect("String parsing error"); - Ok(return_string) -} diff --git a/third_party/rust/flate2/v1/crate/examples/deflateencoder-bufread.rs b/third_party/rust/flate2/v1/crate/examples/deflateencoder-bufread.rs deleted file mode 100644 index 6240f399727a..000000000000 --- a/third_party/rust/flate2/v1/crate/examples/deflateencoder-bufread.rs +++ /dev/null @@ -1,24 +0,0 @@ -extern crate flate2; - -use flate2::bufread::DeflateEncoder; -use flate2::Compression; -use std::fs::File; -use std::io; -use std::io::prelude::*; -use std::io::BufReader; - -// Open file and debug print the contents compressed with Deflate -fn main() { - println!("{:?}", open_hello_world().unwrap()); -} - -// Opens sample file, compresses the contents and returns a Vector or error -// File wrapped in a BufReader implements Bufread -fn open_hello_world() -> io::Result> { - let f = File::open("examples/hello_world.txt")?; - let b = BufReader::new(f); - let mut deflater = DeflateEncoder::new(b, Compression::fast()); - let mut buffer = Vec::new(); - deflater.read_to_end(&mut buffer)?; - Ok(buffer) -} diff --git a/third_party/rust/flate2/v1/crate/examples/deflateencoder-read.rs b/third_party/rust/flate2/v1/crate/examples/deflateencoder-read.rs deleted file mode 100644 index ffb628e48eb9..000000000000 --- a/third_party/rust/flate2/v1/crate/examples/deflateencoder-read.rs +++ /dev/null @@ -1,20 +0,0 @@ -extern crate flate2; - -use flate2::read::DeflateEncoder; -use flate2::Compression; -use std::io; -use std::io::prelude::*; - -// Print the Deflate compressed representation of hello world -fn main() { - println!("{:?}", deflateencoder_read_hello_world().unwrap()); -} - -// Return a vector containing the Deflate compressed version of hello world -fn deflateencoder_read_hello_world() -> io::Result> { - let mut result = Vec::new(); - let c = b"hello world"; - let mut deflater = DeflateEncoder::new(&c[..], Compression::fast()); - deflater.read_to_end(&mut result)?; - Ok(result) -} diff --git a/third_party/rust/flate2/v1/crate/examples/deflateencoder-write.rs b/third_party/rust/flate2/v1/crate/examples/deflateencoder-write.rs deleted file mode 100644 index 243b9dfdd0f5..000000000000 --- a/third_party/rust/flate2/v1/crate/examples/deflateencoder-write.rs +++ /dev/null @@ -1,12 +0,0 @@ -extern crate flate2; - -use flate2::write::DeflateEncoder; -use flate2::Compression; -use std::io::prelude::*; - -// Vec implements Write to print the compressed bytes of sample string -fn main() { - let mut e = DeflateEncoder::new(Vec::new(), Compression::default()); - e.write_all(b"Hello World").unwrap(); - println!("{:?}", e.finish().unwrap()); -} diff --git a/third_party/rust/flate2/v1/crate/examples/gzbuilder.rs b/third_party/rust/flate2/v1/crate/examples/gzbuilder.rs deleted file mode 100644 index d6ec2f4ce134..000000000000 --- a/third_party/rust/flate2/v1/crate/examples/gzbuilder.rs +++ /dev/null @@ -1,24 +0,0 @@ -extern crate flate2; - -use flate2::Compression; -use flate2::GzBuilder; -use std::fs::File; -use std::io; -use std::io::prelude::*; - -// Compresses content of a text file into a gzip file -fn main() { - sample_builder().unwrap(); -} - -// GzBuilder opens a file and writes a sample string using Builder pattern -fn sample_builder() -> Result<(), io::Error> { - let f = File::create("examples/hello_world.txt.gz")?; - let mut gz = GzBuilder::new() - .filename("hello_world.txt") - .comment("test file, please delete") - .write(f, Compression::default()); - gz.write_all(b"hello world")?; - gz.finish()?; - Ok(()) -} diff --git a/third_party/rust/flate2/v1/crate/examples/gzdecoder-bufread.rs b/third_party/rust/flate2/v1/crate/examples/gzdecoder-bufread.rs deleted file mode 100644 index 855119739319..000000000000 --- a/third_party/rust/flate2/v1/crate/examples/gzdecoder-bufread.rs +++ /dev/null @@ -1,23 +0,0 @@ -extern crate flate2; - -use flate2::write::GzEncoder; -use flate2::{bufread, Compression}; -use std::io; -use std::io::prelude::*; - -// Compress a sample string and print it after transformation. -fn main() { - let mut e = GzEncoder::new(Vec::new(), Compression::default()); - e.write_all(b"Hello World").unwrap(); - let bytes = e.finish().unwrap(); - println!("{}", decode_reader(bytes).unwrap()); -} - -// Uncompresses a Gz Encoded vector of bytes and returns a string or error -// Here &[u8] implements BufRead -fn decode_reader(bytes: Vec) -> io::Result { - let mut gz = bufread::GzDecoder::new(&bytes[..]); - let mut s = String::new(); - gz.read_to_string(&mut s)?; - Ok(s) -} diff --git a/third_party/rust/flate2/v1/crate/examples/gzdecoder-read.rs b/third_party/rust/flate2/v1/crate/examples/gzdecoder-read.rs deleted file mode 100644 index 705d28c2e19c..000000000000 --- a/third_party/rust/flate2/v1/crate/examples/gzdecoder-read.rs +++ /dev/null @@ -1,23 +0,0 @@ -extern crate flate2; - -use flate2::write::GzEncoder; -use flate2::{read, Compression}; -use std::io; -use std::io::prelude::*; - -// Compress a sample string and print it after transformation. -fn main() { - let mut e = GzEncoder::new(Vec::new(), Compression::default()); - e.write_all(b"Hello World").unwrap(); - let bytes = e.finish().unwrap(); - println!("{}", decode_reader(bytes).unwrap()); -} - -// Uncompresses a Gz Encoded vector of bytes and returns a string or error -// Here &[u8] implements Read -fn decode_reader(bytes: Vec) -> io::Result { - let mut gz = read::GzDecoder::new(&bytes[..]); - let mut s = String::new(); - gz.read_to_string(&mut s)?; - Ok(s) -} diff --git a/third_party/rust/flate2/v1/crate/examples/gzdecoder-write.rs b/third_party/rust/flate2/v1/crate/examples/gzdecoder-write.rs deleted file mode 100644 index 766bb5e7560d..000000000000 --- a/third_party/rust/flate2/v1/crate/examples/gzdecoder-write.rs +++ /dev/null @@ -1,26 +0,0 @@ -extern crate flate2; - -use flate2::write::{GzDecoder, GzEncoder}; -use flate2::Compression; -use std::io; -use std::io::prelude::*; - -// Compress a sample string and print it after transformation. -fn main() { - let mut e = GzEncoder::new(Vec::new(), Compression::default()); - e.write_all(b"Hello World").unwrap(); - let bytes = e.finish().unwrap(); - println!("{}", decode_writer(bytes).unwrap()); -} - -// Uncompresses a Gz Encoded vector of bytes and returns a string or error -// Here &[u8] implements Read -fn decode_writer(bytes: Vec) -> io::Result { - let mut writer = Vec::new(); - let mut decoder = GzDecoder::new(writer); - decoder.write_all(&bytes[..])?; - decoder.try_finish()?; - writer = decoder.finish()?; - let return_string = String::from_utf8(writer).expect("String parsing error"); - Ok(return_string) -} diff --git a/third_party/rust/flate2/v1/crate/examples/gzencoder-bufread.rs b/third_party/rust/flate2/v1/crate/examples/gzencoder-bufread.rs deleted file mode 100644 index 015ae0a82f07..000000000000 --- a/third_party/rust/flate2/v1/crate/examples/gzencoder-bufread.rs +++ /dev/null @@ -1,24 +0,0 @@ -extern crate flate2; - -use flate2::bufread::GzEncoder; -use flate2::Compression; -use std::fs::File; -use std::io; -use std::io::prelude::*; -use std::io::BufReader; - -// Open file and debug print the contents compressed with gzip -fn main() { - println!("{:?}", open_hello_world().unwrap()); -} - -// Opens sample file, compresses the contents and returns a Vector or error -// File wrapped in a BufReader implements Bufread -fn open_hello_world() -> io::Result> { - let f = File::open("examples/hello_world.txt")?; - let b = BufReader::new(f); - let mut gz = GzEncoder::new(b, Compression::fast()); - let mut buffer = Vec::new(); - gz.read_to_end(&mut buffer)?; - Ok(buffer) -} diff --git a/third_party/rust/flate2/v1/crate/examples/gzencoder-read.rs b/third_party/rust/flate2/v1/crate/examples/gzencoder-read.rs deleted file mode 100644 index 3f1262b99ad4..000000000000 --- a/third_party/rust/flate2/v1/crate/examples/gzencoder-read.rs +++ /dev/null @@ -1,20 +0,0 @@ -extern crate flate2; - -use flate2::read::GzEncoder; -use flate2::Compression; -use std::io; -use std::io::prelude::*; - -// Print the GZ compressed representation of hello world -fn main() { - println!("{:?}", gzencoder_read_hello_world().unwrap()); -} - -// Return a vector containing the GZ compressed version of hello world -fn gzencoder_read_hello_world() -> io::Result> { - let mut result = Vec::new(); - let c = b"hello world"; - let mut z = GzEncoder::new(&c[..], Compression::fast()); - z.read_to_end(&mut result)?; - Ok(result) -} diff --git a/third_party/rust/flate2/v1/crate/examples/gzencoder-write.rs b/third_party/rust/flate2/v1/crate/examples/gzencoder-write.rs deleted file mode 100644 index 275b010c5f96..000000000000 --- a/third_party/rust/flate2/v1/crate/examples/gzencoder-write.rs +++ /dev/null @@ -1,12 +0,0 @@ -extern crate flate2; - -use flate2::write::GzEncoder; -use flate2::Compression; -use std::io::prelude::*; - -// Vec implements Write to print the compressed bytes of sample string -fn main() { - let mut e = GzEncoder::new(Vec::new(), Compression::default()); - e.write_all(b"Hello World").unwrap(); - println!("{:?}", e.finish().unwrap()); -} diff --git a/third_party/rust/flate2/v1/crate/examples/gzmultidecoder-bufread.rs b/third_party/rust/flate2/v1/crate/examples/gzmultidecoder-bufread.rs deleted file mode 100644 index c6bb2c5504d4..000000000000 --- a/third_party/rust/flate2/v1/crate/examples/gzmultidecoder-bufread.rs +++ /dev/null @@ -1,24 +0,0 @@ -extern crate flate2; - -use flate2::bufread::MultiGzDecoder; -use flate2::write::GzEncoder; -use flate2::Compression; -use std::io; -use std::io::prelude::*; - -// Compress a sample string and print it after transformation. -fn main() { - let mut e = GzEncoder::new(Vec::new(), Compression::default()); - e.write_all(b"Hello World").unwrap(); - let bytes = e.finish().unwrap(); - println!("{}", decode_reader(bytes).unwrap()); -} - -// Uncompresses a Gz Encoded vector of bytes and returns a string or error -// Here &[u8] implements BufRead -fn decode_reader(bytes: Vec) -> io::Result { - let mut gz = MultiGzDecoder::new(&bytes[..]); - let mut s = String::new(); - gz.read_to_string(&mut s)?; - Ok(s) -} diff --git a/third_party/rust/flate2/v1/crate/examples/gzmultidecoder-read.rs b/third_party/rust/flate2/v1/crate/examples/gzmultidecoder-read.rs deleted file mode 100644 index 7c8a8e31689b..000000000000 --- a/third_party/rust/flate2/v1/crate/examples/gzmultidecoder-read.rs +++ /dev/null @@ -1,24 +0,0 @@ -extern crate flate2; - -use flate2::read::MultiGzDecoder; -use flate2::write::GzEncoder; -use flate2::Compression; -use std::io; -use std::io::prelude::*; - -// Compress a sample string and print it after transformation. -fn main() { - let mut e = GzEncoder::new(Vec::new(), Compression::default()); - e.write_all(b"Hello World").unwrap(); - let bytes = e.finish().unwrap(); - println!("{}", decode_reader(bytes).unwrap()); -} - -// Uncompresses a Gz Encoded vector of bytes and returns a string or error -// Here &[u8] implements Read -fn decode_reader(bytes: Vec) -> io::Result { - let mut gz = MultiGzDecoder::new(&bytes[..]); - let mut s = String::new(); - gz.read_to_string(&mut s)?; - Ok(s) -} diff --git a/third_party/rust/flate2/v1/crate/examples/hello_world.txt b/third_party/rust/flate2/v1/crate/examples/hello_world.txt deleted file mode 100644 index 557db03de997..000000000000 --- a/third_party/rust/flate2/v1/crate/examples/hello_world.txt +++ /dev/null @@ -1 +0,0 @@ -Hello World diff --git a/third_party/rust/flate2/v1/crate/examples/zlibdecoder-bufread.rs b/third_party/rust/flate2/v1/crate/examples/zlibdecoder-bufread.rs deleted file mode 100644 index 30f168a7b1ec..000000000000 --- a/third_party/rust/flate2/v1/crate/examples/zlibdecoder-bufread.rs +++ /dev/null @@ -1,24 +0,0 @@ -extern crate flate2; - -use flate2::bufread::ZlibDecoder; -use flate2::write::ZlibEncoder; -use flate2::Compression; -use std::io; -use std::io::prelude::*; - -// Compress a sample string and print it after transformation. -fn main() { - let mut e = ZlibEncoder::new(Vec::new(), Compression::default()); - e.write_all(b"Hello World").unwrap(); - let bytes = e.finish().unwrap(); - println!("{}", decode_bufreader(bytes).unwrap()); -} - -// Uncompresses a Zlib Encoded vector of bytes and returns a string or error -// Here &[u8] implements BufRead -fn decode_bufreader(bytes: Vec) -> io::Result { - let mut z = ZlibDecoder::new(&bytes[..]); - let mut s = String::new(); - z.read_to_string(&mut s)?; - Ok(s) -} diff --git a/third_party/rust/flate2/v1/crate/examples/zlibdecoder-read.rs b/third_party/rust/flate2/v1/crate/examples/zlibdecoder-read.rs deleted file mode 100644 index f7e5fb0af69d..000000000000 --- a/third_party/rust/flate2/v1/crate/examples/zlibdecoder-read.rs +++ /dev/null @@ -1,24 +0,0 @@ -extern crate flate2; - -use flate2::read::ZlibDecoder; -use flate2::write::ZlibEncoder; -use flate2::Compression; -use std::io; -use std::io::prelude::*; - -// Compress a sample string and print it after transformation. -fn main() { - let mut e = ZlibEncoder::new(Vec::new(), Compression::default()); - e.write_all(b"Hello World").unwrap(); - let bytes = e.finish().unwrap(); - println!("{}", decode_reader(bytes).unwrap()); -} - -// Uncompresses a Zlib Encoded vector of bytes and returns a string or error -// Here &[u8] implements Read -fn decode_reader(bytes: Vec) -> io::Result { - let mut z = ZlibDecoder::new(&bytes[..]); - let mut s = String::new(); - z.read_to_string(&mut s)?; - Ok(s) -} diff --git a/third_party/rust/flate2/v1/crate/examples/zlibdecoder-write.rs b/third_party/rust/flate2/v1/crate/examples/zlibdecoder-write.rs deleted file mode 100644 index 358e9035b18d..000000000000 --- a/third_party/rust/flate2/v1/crate/examples/zlibdecoder-write.rs +++ /dev/null @@ -1,26 +0,0 @@ -extern crate flate2; - -use flate2::write::ZlibDecoder; -use flate2::write::ZlibEncoder; -use flate2::Compression; -use std::io; -use std::io::prelude::*; - -// Compress a sample string and print it after transformation. -fn main() { - let mut e = ZlibEncoder::new(Vec::new(), Compression::default()); - e.write_all(b"Hello World").unwrap(); - let bytes = e.finish().unwrap(); - println!("{}", decode_reader(bytes).unwrap()); -} - -// Uncompresses a Zlib Encoded vector of bytes and returns a string or error -// Here Vec implements Write -fn decode_reader(bytes: Vec) -> io::Result { - let mut writer = Vec::new(); - let mut z = ZlibDecoder::new(writer); - z.write_all(&bytes[..])?; - writer = z.finish()?; - let return_string = String::from_utf8(writer).expect("String parsing error"); - Ok(return_string) -} diff --git a/third_party/rust/flate2/v1/crate/examples/zlibencoder-bufread.rs b/third_party/rust/flate2/v1/crate/examples/zlibencoder-bufread.rs deleted file mode 100644 index 0321d8d38b37..000000000000 --- a/third_party/rust/flate2/v1/crate/examples/zlibencoder-bufread.rs +++ /dev/null @@ -1,24 +0,0 @@ -extern crate flate2; - -use flate2::bufread::ZlibEncoder; -use flate2::Compression; -use std::fs::File; -use std::io; -use std::io::prelude::*; -use std::io::BufReader; - -// Open file and debug print the contents compressed with zlib -fn main() { - println!("{:?}", open_hello_world().unwrap()); -} - -// Opens sample file, compresses the contents and returns a Vector or error -// File wrapped in a BufReader implements Bufread -fn open_hello_world() -> io::Result> { - let f = File::open("examples/hello_world.txt")?; - let b = BufReader::new(f); - let mut z = ZlibEncoder::new(b, Compression::fast()); - let mut buffer = Vec::new(); - z.read_to_end(&mut buffer)?; - Ok(buffer) -} diff --git a/third_party/rust/flate2/v1/crate/examples/zlibencoder-read.rs b/third_party/rust/flate2/v1/crate/examples/zlibencoder-read.rs deleted file mode 100644 index 779eb1d31b75..000000000000 --- a/third_party/rust/flate2/v1/crate/examples/zlibencoder-read.rs +++ /dev/null @@ -1,21 +0,0 @@ -extern crate flate2; - -use flate2::read::ZlibEncoder; -use flate2::Compression; -use std::fs::File; -use std::io::prelude::*; - -// Open file and debug print the compressed contents -fn main() { - println!("{:?}", open_hello_world().unwrap()); -} - -// Opens sample file, compresses the contents and returns a Vector or error -// File implements Read -fn open_hello_world() -> std::io::Result> { - let f = File::open("examples/hello_world.txt")?; - let mut z = ZlibEncoder::new(f, Compression::fast()); - let mut result = Vec::new(); - z.read_to_end(&mut result)?; - Ok(result) -} diff --git a/third_party/rust/flate2/v1/crate/examples/zlibencoder-write.rs b/third_party/rust/flate2/v1/crate/examples/zlibencoder-write.rs deleted file mode 100644 index 76bcf17b17b1..000000000000 --- a/third_party/rust/flate2/v1/crate/examples/zlibencoder-write.rs +++ /dev/null @@ -1,12 +0,0 @@ -extern crate flate2; - -use flate2::write::ZlibEncoder; -use flate2::Compression; -use std::io::prelude::*; - -// Vec implements Write to print the compressed bytes of sample string -fn main() { - let mut e = ZlibEncoder::new(Vec::new(), Compression::default()); - e.write_all(b"Hello World").unwrap(); - println!("{:?}", e.finish().unwrap()); -} diff --git a/third_party/rust/flate2/v1/crate/src/bufreader.rs b/third_party/rust/flate2/v1/crate/src/bufreader.rs deleted file mode 100644 index 7e6f89d68810..000000000000 --- a/third_party/rust/flate2/v1/crate/src/bufreader.rs +++ /dev/null @@ -1,104 +0,0 @@ -// Copyright 2013 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// . -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -use std::cmp; -use std::io; -use std::io::prelude::*; -use std::mem; - -pub struct BufReader { - inner: R, - buf: Box<[u8]>, - pos: usize, - cap: usize, -} - -impl ::std::fmt::Debug for BufReader -where - R: ::std::fmt::Debug, -{ - fn fmt(&self, fmt: &mut ::std::fmt::Formatter) -> Result<(), ::std::fmt::Error> { - fmt.debug_struct("BufReader") - .field("reader", &self.inner) - .field( - "buffer", - &format_args!("{}/{}", self.cap - self.pos, self.buf.len()), - ) - .finish() - } -} - -impl BufReader { - pub fn new(inner: R) -> BufReader { - BufReader::with_buf(vec![0; 32 * 1024], inner) - } - - pub fn with_buf(buf: Vec, inner: R) -> BufReader { - BufReader { - inner, - buf: buf.into_boxed_slice(), - pos: 0, - cap: 0, - } - } -} - -impl BufReader { - pub fn get_ref(&self) -> &R { - &self.inner - } - - pub fn get_mut(&mut self) -> &mut R { - &mut self.inner - } - - pub fn into_inner(self) -> R { - self.inner - } - - pub fn reset(&mut self, inner: R) -> R { - self.pos = 0; - self.cap = 0; - mem::replace(&mut self.inner, inner) - } -} - -impl Read for BufReader { - fn read(&mut self, buf: &mut [u8]) -> io::Result { - // If we don't have any buffered data and we're doing a massive read - // (larger than our internal buffer), bypass our internal buffer - // entirely. - if self.pos == self.cap && buf.len() >= self.buf.len() { - return self.inner.read(buf); - } - let nread = { - let mut rem = self.fill_buf()?; - rem.read(buf)? - }; - self.consume(nread); - Ok(nread) - } -} - -impl BufRead for BufReader { - fn fill_buf(&mut self) -> io::Result<&[u8]> { - // If we've reached the end of our internal buffer then we need to fetch - // some more data from the underlying reader. - if self.pos == self.cap { - self.cap = self.inner.read(&mut self.buf)?; - self.pos = 0; - } - Ok(&self.buf[self.pos..self.cap]) - } - - fn consume(&mut self, amt: usize) { - self.pos = cmp::min(self.pos + amt, self.cap); - } -} diff --git a/third_party/rust/flate2/v1/crate/src/crc.rs b/third_party/rust/flate2/v1/crate/src/crc.rs deleted file mode 100644 index cd00cebe1836..000000000000 --- a/third_party/rust/flate2/v1/crate/src/crc.rs +++ /dev/null @@ -1,184 +0,0 @@ -//! Simple CRC bindings backed by miniz.c - -use std::io; -use std::io::prelude::*; - -use crc32fast::Hasher; - -/// The CRC calculated by a [`CrcReader`]. -/// -/// [`CrcReader`]: struct.CrcReader.html -#[derive(Debug)] -pub struct Crc { - amt: u32, - hasher: Hasher, -} - -/// A wrapper around a [`Read`] that calculates the CRC. -/// -/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html -#[derive(Debug)] -pub struct CrcReader { - inner: R, - crc: Crc, -} - -impl Default for Crc { - fn default() -> Self { - Self::new() - } -} - -impl Crc { - /// Create a new CRC. - pub fn new() -> Crc { - Crc { - amt: 0, - hasher: Hasher::new(), - } - } - - /// Returns the current crc32 checksum. - pub fn sum(&self) -> u32 { - self.hasher.clone().finalize() - } - - /// The number of bytes that have been used to calculate the CRC. - /// This value is only accurate if the amount is lower than 232. - pub fn amount(&self) -> u32 { - self.amt - } - - /// Update the CRC with the bytes in `data`. - pub fn update(&mut self, data: &[u8]) { - self.amt = self.amt.wrapping_add(data.len() as u32); - self.hasher.update(data); - } - - /// Reset the CRC. - pub fn reset(&mut self) { - self.amt = 0; - self.hasher.reset(); - } - - /// Combine the CRC with the CRC for the subsequent block of bytes. - pub fn combine(&mut self, additional_crc: &Crc) { - self.amt += additional_crc.amt; - self.hasher.combine(&additional_crc.hasher); - } -} - -impl CrcReader { - /// Create a new CrcReader. - pub fn new(r: R) -> CrcReader { - CrcReader { - inner: r, - crc: Crc::new(), - } - } -} - -impl CrcReader { - /// Get the Crc for this CrcReader. - pub fn crc(&self) -> &Crc { - &self.crc - } - - /// Get the reader that is wrapped by this CrcReader. - pub fn into_inner(self) -> R { - self.inner - } - - /// Get the reader that is wrapped by this CrcReader by reference. - pub fn get_ref(&self) -> &R { - &self.inner - } - - /// Get a mutable reference to the reader that is wrapped by this CrcReader. - pub fn get_mut(&mut self) -> &mut R { - &mut self.inner - } - - /// Reset the Crc in this CrcReader. - pub fn reset(&mut self) { - self.crc.reset(); - } -} - -impl Read for CrcReader { - fn read(&mut self, into: &mut [u8]) -> io::Result { - let amt = self.inner.read(into)?; - self.crc.update(&into[..amt]); - Ok(amt) - } -} - -impl BufRead for CrcReader { - fn fill_buf(&mut self) -> io::Result<&[u8]> { - self.inner.fill_buf() - } - fn consume(&mut self, amt: usize) { - if let Ok(data) = self.inner.fill_buf() { - self.crc.update(&data[..amt]); - } - self.inner.consume(amt); - } -} - -/// A wrapper around a [`Write`] that calculates the CRC. -/// -/// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html -#[derive(Debug)] -pub struct CrcWriter { - inner: W, - crc: Crc, -} - -impl CrcWriter { - /// Get the Crc for this CrcWriter. - pub fn crc(&self) -> &Crc { - &self.crc - } - - /// Get the writer that is wrapped by this CrcWriter. - pub fn into_inner(self) -> W { - self.inner - } - - /// Get the writer that is wrapped by this CrcWriter by reference. - pub fn get_ref(&self) -> &W { - &self.inner - } - - /// Get a mutable reference to the writer that is wrapped by this CrcWriter. - pub fn get_mut(&mut self) -> &mut W { - &mut self.inner - } - - /// Reset the Crc in this CrcWriter. - pub fn reset(&mut self) { - self.crc.reset(); - } -} - -impl CrcWriter { - /// Create a new CrcWriter. - pub fn new(w: W) -> CrcWriter { - CrcWriter { - inner: w, - crc: Crc::new(), - } - } -} - -impl Write for CrcWriter { - fn write(&mut self, buf: &[u8]) -> io::Result { - let amt = self.inner.write(buf)?; - self.crc.update(&buf[..amt]); - Ok(amt) - } - - fn flush(&mut self) -> io::Result<()> { - self.inner.flush() - } -} diff --git a/third_party/rust/flate2/v1/crate/src/deflate/bufread.rs b/third_party/rust/flate2/v1/crate/src/deflate/bufread.rs deleted file mode 100644 index f0b29e0b4dab..000000000000 --- a/third_party/rust/flate2/v1/crate/src/deflate/bufread.rs +++ /dev/null @@ -1,243 +0,0 @@ -use std::io; -use std::io::prelude::*; -use std::mem; - -use crate::zio; -use crate::{Compress, Decompress}; - -/// A DEFLATE encoder, or compressor. -/// -/// This structure consumes a [`BufRead`] interface, reading uncompressed data -/// from the underlying reader, and emitting compressed data. -/// -/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html -/// -/// # Examples -/// -/// ``` -/// use std::io::prelude::*; -/// use std::io; -/// use flate2::Compression; -/// use flate2::bufread::DeflateEncoder; -/// use std::fs::File; -/// use std::io::BufReader; -/// -/// # fn main() { -/// # println!("{:?}", open_hello_world().unwrap()); -/// # } -/// # -/// // Opens sample file, compresses the contents and returns a Vector -/// fn open_hello_world() -> io::Result> { -/// let f = File::open("examples/hello_world.txt")?; -/// let b = BufReader::new(f); -/// let mut deflater = DeflateEncoder::new(b, Compression::fast()); -/// let mut buffer = Vec::new(); -/// deflater.read_to_end(&mut buffer)?; -/// Ok(buffer) -/// } -/// ``` -#[derive(Debug)] -pub struct DeflateEncoder { - obj: R, - data: Compress, -} - -impl DeflateEncoder { - /// Creates a new encoder which will read uncompressed data from the given - /// stream and emit the compressed stream. - pub fn new(r: R, level: crate::Compression) -> DeflateEncoder { - DeflateEncoder { - obj: r, - data: Compress::new(level, false), - } - } -} - -pub fn reset_encoder_data(zlib: &mut DeflateEncoder) { - zlib.data.reset(); -} - -impl DeflateEncoder { - /// Resets the state of this encoder entirely, swapping out the input - /// stream for another. - /// - /// This function will reset the internal state of this encoder and replace - /// the input stream with the one provided, returning the previous input - /// stream. Future data read from this encoder will be the compressed - /// version of `r`'s data. - pub fn reset(&mut self, r: R) -> R { - reset_encoder_data(self); - mem::replace(&mut self.obj, r) - } - - /// Acquires a reference to the underlying reader - pub fn get_ref(&self) -> &R { - &self.obj - } - - /// Acquires a mutable reference to the underlying stream - /// - /// Note that mutation of the stream may result in surprising results if - /// this encoder is continued to be used. - pub fn get_mut(&mut self) -> &mut R { - &mut self.obj - } - - /// Consumes this encoder, returning the underlying reader. - pub fn into_inner(self) -> R { - self.obj - } - - /// Returns the number of bytes that have been read into this compressor. - /// - /// Note that not all bytes read from the underlying object may be accounted - /// for, there may still be some active buffering. - pub fn total_in(&self) -> u64 { - self.data.total_in() - } - - /// Returns the number of bytes that the compressor has produced. - /// - /// Note that not all bytes may have been read yet, some may still be - /// buffered. - pub fn total_out(&self) -> u64 { - self.data.total_out() - } -} - -impl Read for DeflateEncoder { - fn read(&mut self, buf: &mut [u8]) -> io::Result { - zio::read(&mut self.obj, &mut self.data, buf) - } -} - -impl Write for DeflateEncoder { - fn write(&mut self, buf: &[u8]) -> io::Result { - self.get_mut().write(buf) - } - - fn flush(&mut self) -> io::Result<()> { - self.get_mut().flush() - } -} - -/// A DEFLATE decoder, or decompressor. -/// -/// This structure consumes a [`BufRead`] interface, reading compressed data -/// from the underlying reader, and emitting uncompressed data. -/// -/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html -/// -/// # Examples -/// -/// ``` -/// use std::io::prelude::*; -/// use std::io; -/// # use flate2::Compression; -/// # use flate2::write::DeflateEncoder; -/// use flate2::bufread::DeflateDecoder; -/// -/// # fn main() { -/// # let mut e = DeflateEncoder::new(Vec::new(), Compression::default()); -/// # e.write_all(b"Hello World").unwrap(); -/// # let bytes = e.finish().unwrap(); -/// # println!("{}", decode_reader(bytes).unwrap()); -/// # } -/// // Uncompresses a Deflate Encoded vector of bytes and returns a string or error -/// // Here &[u8] implements Read -/// fn decode_reader(bytes: Vec) -> io::Result { -/// let mut deflater = DeflateDecoder::new(&bytes[..]); -/// let mut s = String::new(); -/// deflater.read_to_string(&mut s)?; -/// Ok(s) -/// } -/// ``` -#[derive(Debug)] -pub struct DeflateDecoder { - obj: R, - data: Decompress, -} - -pub fn reset_decoder_data(zlib: &mut DeflateDecoder) { - zlib.data = Decompress::new(false); -} - -impl DeflateDecoder { - /// Creates a new decoder which will decompress data read from the given - /// stream. - pub fn new(r: R) -> DeflateDecoder { - DeflateDecoder { - obj: r, - data: Decompress::new(false), - } - } -} - -impl DeflateDecoder { - /// Resets the state of this decoder entirely, swapping out the input - /// stream for another. - /// - /// This will reset the internal state of this decoder and replace the - /// input stream with the one provided, returning the previous input - /// stream. Future data read from this decoder will be the decompressed - /// version of `r`'s data. - pub fn reset(&mut self, r: R) -> R { - reset_decoder_data(self); - mem::replace(&mut self.obj, r) - } - - /// Resets the state of this decoder's data - /// - /// This will reset the internal state of this decoder. It will continue - /// reading from the same stream. - pub fn reset_data(&mut self) { - reset_decoder_data(self); - } - - /// Acquires a reference to the underlying stream - pub fn get_ref(&self) -> &R { - &self.obj - } - - /// Acquires a mutable reference to the underlying stream - /// - /// Note that mutation of the stream may result in surprising results if - /// this encoder is continued to be used. - pub fn get_mut(&mut self) -> &mut R { - &mut self.obj - } - - /// Consumes this decoder, returning the underlying reader. - pub fn into_inner(self) -> R { - self.obj - } - - /// Returns the number of bytes that the decompressor has consumed. - /// - /// Note that this will likely be smaller than what the decompressor - /// actually read from the underlying stream due to buffering. - pub fn total_in(&self) -> u64 { - self.data.total_in() - } - - /// Returns the number of bytes that the decompressor has produced. - pub fn total_out(&self) -> u64 { - self.data.total_out() - } -} - -impl Read for DeflateDecoder { - fn read(&mut self, into: &mut [u8]) -> io::Result { - zio::read(&mut self.obj, &mut self.data, into) - } -} - -impl Write for DeflateDecoder { - fn write(&mut self, buf: &[u8]) -> io::Result { - self.get_mut().write(buf) - } - - fn flush(&mut self) -> io::Result<()> { - self.get_mut().flush() - } -} diff --git a/third_party/rust/flate2/v1/crate/src/deflate/mod.rs b/third_party/rust/flate2/v1/crate/src/deflate/mod.rs deleted file mode 100644 index 51758b30ae4a..000000000000 --- a/third_party/rust/flate2/v1/crate/src/deflate/mod.rs +++ /dev/null @@ -1,193 +0,0 @@ -pub mod bufread; -pub mod read; -pub mod write; - -#[cfg(test)] -mod tests { - use std::io::prelude::*; - - use rand::{thread_rng, Rng}; - - use super::{read, write}; - use crate::Compression; - - #[test] - fn roundtrip() { - let mut real = Vec::new(); - let mut w = write::DeflateEncoder::new(Vec::new(), Compression::default()); - let v = crate::random_bytes().take(1024).collect::>(); - for _ in 0..200 { - let to_write = &v[..thread_rng().gen_range(0..v.len())]; - real.extend(to_write.iter().map(|x| *x)); - w.write_all(to_write).unwrap(); - } - let result = w.finish().unwrap(); - let mut r = read::DeflateDecoder::new(&result[..]); - let mut ret = Vec::new(); - r.read_to_end(&mut ret).unwrap(); - assert!(ret == real); - } - - #[test] - fn drop_writes() { - let mut data = Vec::new(); - write::DeflateEncoder::new(&mut data, Compression::default()) - .write_all(b"foo") - .unwrap(); - let mut r = read::DeflateDecoder::new(&data[..]); - let mut ret = Vec::new(); - r.read_to_end(&mut ret).unwrap(); - assert!(ret == b"foo"); - } - - #[test] - fn total_in() { - let mut real = Vec::new(); - let mut w = write::DeflateEncoder::new(Vec::new(), Compression::default()); - let v = crate::random_bytes().take(1024).collect::>(); - for _ in 0..200 { - let to_write = &v[..thread_rng().gen_range(0..v.len())]; - real.extend(to_write.iter().map(|x| *x)); - w.write_all(to_write).unwrap(); - } - let mut result = w.finish().unwrap(); - - let result_len = result.len(); - - for _ in 0..200 { - result.extend(v.iter().map(|x| *x)); - } - - let mut r = read::DeflateDecoder::new(&result[..]); - let mut ret = Vec::new(); - r.read_to_end(&mut ret).unwrap(); - assert!(ret == real); - assert_eq!(r.total_in(), result_len as u64); - } - - #[test] - fn roundtrip2() { - let v = crate::random_bytes().take(1024 * 1024).collect::>(); - let mut r = - read::DeflateDecoder::new(read::DeflateEncoder::new(&v[..], Compression::default())); - let mut ret = Vec::new(); - r.read_to_end(&mut ret).unwrap(); - assert_eq!(ret, v); - } - - #[test] - fn roundtrip3() { - let v = crate::random_bytes().take(1024 * 1024).collect::>(); - let mut w = write::DeflateEncoder::new( - write::DeflateDecoder::new(Vec::new()), - Compression::default(), - ); - w.write_all(&v).unwrap(); - let w = w.finish().unwrap().finish().unwrap(); - assert!(w == v); - } - - #[test] - fn reset_writer() { - let v = crate::random_bytes().take(1024 * 1024).collect::>(); - let mut w = write::DeflateEncoder::new(Vec::new(), Compression::default()); - w.write_all(&v).unwrap(); - let a = w.reset(Vec::new()).unwrap(); - w.write_all(&v).unwrap(); - let b = w.finish().unwrap(); - - let mut w = write::DeflateEncoder::new(Vec::new(), Compression::default()); - w.write_all(&v).unwrap(); - let c = w.finish().unwrap(); - assert!(a == b && b == c); - } - - #[test] - fn reset_reader() { - let v = crate::random_bytes().take(1024 * 1024).collect::>(); - let (mut a, mut b, mut c) = (Vec::new(), Vec::new(), Vec::new()); - let mut r = read::DeflateEncoder::new(&v[..], Compression::default()); - r.read_to_end(&mut a).unwrap(); - r.reset(&v[..]); - r.read_to_end(&mut b).unwrap(); - - let mut r = read::DeflateEncoder::new(&v[..], Compression::default()); - r.read_to_end(&mut c).unwrap(); - assert!(a == b && b == c); - } - - #[test] - fn reset_decoder() { - let v = crate::random_bytes().take(1024 * 1024).collect::>(); - let mut w = write::DeflateEncoder::new(Vec::new(), Compression::default()); - w.write_all(&v).unwrap(); - let data = w.finish().unwrap(); - - { - let (mut a, mut b, mut c) = (Vec::new(), Vec::new(), Vec::new()); - let mut r = read::DeflateDecoder::new(&data[..]); - r.read_to_end(&mut a).unwrap(); - r.reset(&data); - r.read_to_end(&mut b).unwrap(); - - let mut r = read::DeflateDecoder::new(&data[..]); - r.read_to_end(&mut c).unwrap(); - assert!(a == b && b == c && c == v); - } - - { - let mut w = write::DeflateDecoder::new(Vec::new()); - w.write_all(&data).unwrap(); - let a = w.reset(Vec::new()).unwrap(); - w.write_all(&data).unwrap(); - let b = w.finish().unwrap(); - - let mut w = write::DeflateDecoder::new(Vec::new()); - w.write_all(&data).unwrap(); - let c = w.finish().unwrap(); - assert!(a == b && b == c && c == v); - } - } - - #[test] - fn zero_length_read_with_data() { - let m = vec![3u8; 128 * 1024 + 1]; - let mut c = read::DeflateEncoder::new(&m[..], Compression::default()); - - let mut result = Vec::new(); - c.read_to_end(&mut result).unwrap(); - - let mut d = read::DeflateDecoder::new(&result[..]); - let mut data = Vec::new(); - assert!(d.read(&mut data).unwrap() == 0); - } - - #[test] - fn qc_reader() { - ::quickcheck::quickcheck(test as fn(_) -> _); - - fn test(v: Vec) -> bool { - let mut r = read::DeflateDecoder::new(read::DeflateEncoder::new( - &v[..], - Compression::default(), - )); - let mut v2 = Vec::new(); - r.read_to_end(&mut v2).unwrap(); - v == v2 - } - } - - #[test] - fn qc_writer() { - ::quickcheck::quickcheck(test as fn(_) -> _); - - fn test(v: Vec) -> bool { - let mut w = write::DeflateEncoder::new( - write::DeflateDecoder::new(Vec::new()), - Compression::default(), - ); - w.write_all(&v).unwrap(); - v == w.finish().unwrap().finish().unwrap() - } - } -} diff --git a/third_party/rust/flate2/v1/crate/src/deflate/read.rs b/third_party/rust/flate2/v1/crate/src/deflate/read.rs deleted file mode 100644 index fd17a894af81..000000000000 --- a/third_party/rust/flate2/v1/crate/src/deflate/read.rs +++ /dev/null @@ -1,241 +0,0 @@ -use std::io; -use std::io::prelude::*; - -use super::bufread; -use crate::bufreader::BufReader; - -/// A DEFLATE encoder, or compressor. -/// -/// This structure implements a [`Read`] interface and will read uncompressed -/// data from an underlying stream and emit a stream of compressed data. -/// -/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html -/// -/// # Examples -/// -/// ``` -/// use std::io::prelude::*; -/// use std::io; -/// use flate2::Compression; -/// use flate2::read::DeflateEncoder; -/// -/// # fn main() { -/// # println!("{:?}", deflateencoder_read_hello_world().unwrap()); -/// # } -/// # -/// // Return a vector containing the Deflate compressed version of hello world -/// fn deflateencoder_read_hello_world() -> io::Result> { -/// let mut ret_vec = [0;100]; -/// let c = b"hello world"; -/// let mut deflater = DeflateEncoder::new(&c[..], Compression::fast()); -/// let count = deflater.read(&mut ret_vec)?; -/// Ok(ret_vec[0..count].to_vec()) -/// } -/// ``` -#[derive(Debug)] -pub struct DeflateEncoder { - inner: bufread::DeflateEncoder>, -} - -impl DeflateEncoder { - /// Creates a new encoder which will read uncompressed data from the given - /// stream and emit the compressed stream. - pub fn new(r: R, level: crate::Compression) -> DeflateEncoder { - DeflateEncoder { - inner: bufread::DeflateEncoder::new(BufReader::new(r), level), - } - } -} - -impl DeflateEncoder { - /// Resets the state of this encoder entirely, swapping out the input - /// stream for another. - /// - /// This function will reset the internal state of this encoder and replace - /// the input stream with the one provided, returning the previous input - /// stream. Future data read from this encoder will be the compressed - /// version of `r`'s data. - /// - /// Note that there may be currently buffered data when this function is - /// called, and in that case the buffered data is discarded. - pub fn reset(&mut self, r: R) -> R { - super::bufread::reset_encoder_data(&mut self.inner); - self.inner.get_mut().reset(r) - } - - /// Acquires a reference to the underlying reader - pub fn get_ref(&self) -> &R { - self.inner.get_ref().get_ref() - } - - /// Acquires a mutable reference to the underlying stream - /// - /// Note that mutation of the stream may result in surprising results if - /// this encoder is continued to be used. - pub fn get_mut(&mut self) -> &mut R { - self.inner.get_mut().get_mut() - } - - /// Consumes this encoder, returning the underlying reader. - /// - /// Note that there may be buffered bytes which are not re-acquired as part - /// of this transition. It's recommended to only call this function after - /// EOF has been reached. - pub fn into_inner(self) -> R { - self.inner.into_inner().into_inner() - } - - /// Returns the number of bytes that have been read into this compressor. - /// - /// Note that not all bytes read from the underlying object may be accounted - /// for, there may still be some active buffering. - pub fn total_in(&self) -> u64 { - self.inner.total_in() - } - - /// Returns the number of bytes that the compressor has produced. - /// - /// Note that not all bytes may have been read yet, some may still be - /// buffered. - pub fn total_out(&self) -> u64 { - self.inner.total_out() - } -} - -impl Read for DeflateEncoder { - fn read(&mut self, buf: &mut [u8]) -> io::Result { - self.inner.read(buf) - } -} - -impl Write for DeflateEncoder { - fn write(&mut self, buf: &[u8]) -> io::Result { - self.get_mut().write(buf) - } - - fn flush(&mut self) -> io::Result<()> { - self.get_mut().flush() - } -} - -/// A DEFLATE decoder, or decompressor. -/// -/// This structure implements a [`Read`] interface and takes a stream of -/// compressed data as input, providing the decompressed data when read from. -/// -/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html -/// -/// # Examples -/// -/// ``` -/// use std::io::prelude::*; -/// use std::io; -/// # use flate2::Compression; -/// # use flate2::write::DeflateEncoder; -/// use flate2::read::DeflateDecoder; -/// -/// # fn main() { -/// # let mut e = DeflateEncoder::new(Vec::new(), Compression::default()); -/// # e.write_all(b"Hello World").unwrap(); -/// # let bytes = e.finish().unwrap(); -/// # println!("{}", decode_reader(bytes).unwrap()); -/// # } -/// // Uncompresses a Deflate Encoded vector of bytes and returns a string or error -/// // Here &[u8] implements Read -/// fn decode_reader(bytes: Vec) -> io::Result { -/// let mut deflater = DeflateDecoder::new(&bytes[..]); -/// let mut s = String::new(); -/// deflater.read_to_string(&mut s)?; -/// Ok(s) -/// } -/// ``` -#[derive(Debug)] -pub struct DeflateDecoder { - inner: bufread::DeflateDecoder>, -} - -impl DeflateDecoder { - /// Creates a new decoder which will decompress data read from the given - /// stream. - pub fn new(r: R) -> DeflateDecoder { - DeflateDecoder::new_with_buf(r, vec![0; 32 * 1024]) - } - - /// Same as `new`, but the intermediate buffer for data is specified. - /// - /// Note that the capacity of the intermediate buffer is never increased, - /// and it is recommended for it to be large. - pub fn new_with_buf(r: R, buf: Vec) -> DeflateDecoder { - DeflateDecoder { - inner: bufread::DeflateDecoder::new(BufReader::with_buf(buf, r)), - } - } -} - -impl DeflateDecoder { - /// Resets the state of this decoder entirely, swapping out the input - /// stream for another. - /// - /// This will reset the internal state of this decoder and replace the - /// input stream with the one provided, returning the previous input - /// stream. Future data read from this decoder will be the decompressed - /// version of `r`'s data. - /// - /// Note that there may be currently buffered data when this function is - /// called, and in that case the buffered data is discarded. - pub fn reset(&mut self, r: R) -> R { - super::bufread::reset_decoder_data(&mut self.inner); - self.inner.get_mut().reset(r) - } - - /// Acquires a reference to the underlying stream - pub fn get_ref(&self) -> &R { - self.inner.get_ref().get_ref() - } - - /// Acquires a mutable reference to the underlying stream - /// - /// Note that mutation of the stream may result in surprising results if - /// this encoder is continued to be used. - pub fn get_mut(&mut self) -> &mut R { - self.inner.get_mut().get_mut() - } - - /// Consumes this decoder, returning the underlying reader. - /// - /// Note that there may be buffered bytes which are not re-acquired as part - /// of this transition. It's recommended to only call this function after - /// EOF has been reached. - pub fn into_inner(self) -> R { - self.inner.into_inner().into_inner() - } - - /// Returns the number of bytes that the decompressor has consumed. - /// - /// Note that this will likely be smaller than what the decompressor - /// actually read from the underlying stream due to buffering. - pub fn total_in(&self) -> u64 { - self.inner.total_in() - } - - /// Returns the number of bytes that the decompressor has produced. - pub fn total_out(&self) -> u64 { - self.inner.total_out() - } -} - -impl Read for DeflateDecoder { - fn read(&mut self, into: &mut [u8]) -> io::Result { - self.inner.read(into) - } -} - -impl Write for DeflateDecoder { - fn write(&mut self, buf: &[u8]) -> io::Result { - self.get_mut().write(buf) - } - - fn flush(&mut self) -> io::Result<()> { - self.get_mut().flush() - } -} diff --git a/third_party/rust/flate2/v1/crate/src/deflate/write.rs b/third_party/rust/flate2/v1/crate/src/deflate/write.rs deleted file mode 100644 index 2c44556ac4bb..000000000000 --- a/third_party/rust/flate2/v1/crate/src/deflate/write.rs +++ /dev/null @@ -1,322 +0,0 @@ -use std::io; -use std::io::prelude::*; - -use crate::zio; -use crate::{Compress, Decompress}; - -/// A DEFLATE encoder, or compressor. -/// -/// This structure implements a [`Write`] interface and takes a stream of -/// uncompressed data, writing the compressed data to the wrapped writer. -/// -/// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html -/// -/// # Examples -/// -/// ``` -/// use std::io::prelude::*; -/// use flate2::Compression; -/// use flate2::write::DeflateEncoder; -/// -/// // Vec implements Write to print the compressed bytes of sample string -/// # fn main() { -/// -/// let mut e = DeflateEncoder::new(Vec::new(), Compression::default()); -/// e.write_all(b"Hello World").unwrap(); -/// println!("{:?}", e.finish().unwrap()); -/// # } -/// ``` -#[derive(Debug)] -pub struct DeflateEncoder { - inner: zio::Writer, -} - -impl DeflateEncoder { - /// Creates a new encoder which will write compressed data to the stream - /// given at the given compression level. - /// - /// When this encoder is dropped or unwrapped the final pieces of data will - /// be flushed. - pub fn new(w: W, level: crate::Compression) -> DeflateEncoder { - DeflateEncoder { - inner: zio::Writer::new(w, Compress::new(level, false)), - } - } - - /// Acquires a reference to the underlying writer. - pub fn get_ref(&self) -> &W { - self.inner.get_ref() - } - - /// Acquires a mutable reference to the underlying writer. - /// - /// Note that mutating the output/input state of the stream may corrupt this - /// object, so care must be taken when using this method. - pub fn get_mut(&mut self) -> &mut W { - self.inner.get_mut() - } - - /// Resets the state of this encoder entirely, swapping out the output - /// stream for another. - /// - /// This function will finish encoding the current stream into the current - /// output stream before swapping out the two output streams. If the stream - /// cannot be finished an error is returned. - /// - /// After the current stream has been finished, this will reset the internal - /// state of this encoder and replace the output stream with the one - /// provided, returning the previous output stream. Future data written to - /// this encoder will be the compressed into the stream `w` provided. - /// - /// # Errors - /// - /// This function will perform I/O to complete this stream, and any I/O - /// errors which occur will be returned from this function. - pub fn reset(&mut self, w: W) -> io::Result { - self.inner.finish()?; - self.inner.data.reset(); - Ok(self.inner.replace(w)) - } - - /// Attempt to finish this output stream, writing out final chunks of data. - /// - /// Note that this function can only be used once data has finished being - /// written to the output stream. After this function is called then further - /// calls to `write` may result in a panic. - /// - /// # Panics - /// - /// Attempts to write data to this stream may result in a panic after this - /// function is called. - /// - /// # Errors - /// - /// This function will perform I/O to complete this stream, and any I/O - /// errors which occur will be returned from this function. - pub fn try_finish(&mut self) -> io::Result<()> { - self.inner.finish() - } - - /// Consumes this encoder, flushing the output stream. - /// - /// This will flush the underlying data stream, close off the compressed - /// stream and, if successful, return the contained writer. - /// - /// Note that this function may not be suitable to call in a situation where - /// the underlying stream is an asynchronous I/O stream. To finish a stream - /// the `try_finish` (or `shutdown`) method should be used instead. To - /// re-acquire ownership of a stream it is safe to call this method after - /// `try_finish` or `shutdown` has returned `Ok`. - /// - /// # Errors - /// - /// This function will perform I/O to complete this stream, and any I/O - /// errors which occur will be returned from this function. - pub fn finish(mut self) -> io::Result { - self.inner.finish()?; - Ok(self.inner.take_inner()) - } - - /// Consumes this encoder, flushing the output stream. - /// - /// This will flush the underlying data stream and then return the contained - /// writer if the flush succeeded. - /// The compressed stream will not closed but only flushed. This - /// means that obtained byte array can by extended by another deflated - /// stream. To close the stream add the two bytes 0x3 and 0x0. - /// - /// # Errors - /// - /// This function will perform I/O to complete this stream, and any I/O - /// errors which occur will be returned from this function. - pub fn flush_finish(mut self) -> io::Result { - self.inner.flush()?; - Ok(self.inner.take_inner()) - } - - /// Returns the number of bytes that have been written to this compressor. - /// - /// Note that not all bytes written to this object may be accounted for, - /// there may still be some active buffering. - pub fn total_in(&self) -> u64 { - self.inner.data.total_in() - } - - /// Returns the number of bytes that the compressor has produced. - /// - /// Note that not all bytes may have been written yet, some may still be - /// buffered. - pub fn total_out(&self) -> u64 { - self.inner.data.total_out() - } -} - -impl Write for DeflateEncoder { - fn write(&mut self, buf: &[u8]) -> io::Result { - self.inner.write(buf) - } - - fn flush(&mut self) -> io::Result<()> { - self.inner.flush() - } -} - -impl Read for DeflateEncoder { - fn read(&mut self, buf: &mut [u8]) -> io::Result { - self.inner.get_mut().read(buf) - } -} - -/// A DEFLATE decoder, or decompressor. -/// -/// This structure implements a [`Write`] and will emit a stream of decompressed -/// data when fed a stream of compressed data. -/// -/// [`Write`]: https://doc.rust-lang.org/std/io/trait.Read.html -/// -/// # Examples -/// -/// ``` -/// use std::io::prelude::*; -/// use std::io; -/// # use flate2::Compression; -/// # use flate2::write::DeflateEncoder; -/// use flate2::write::DeflateDecoder; -/// -/// # fn main() { -/// # let mut e = DeflateEncoder::new(Vec::new(), Compression::default()); -/// # e.write_all(b"Hello World").unwrap(); -/// # let bytes = e.finish().unwrap(); -/// # println!("{}", decode_writer(bytes).unwrap()); -/// # } -/// // Uncompresses a Deflate Encoded vector of bytes and returns a string or error -/// // Here Vec implements Write -/// fn decode_writer(bytes: Vec) -> io::Result { -/// let mut writer = Vec::new(); -/// let mut deflater = DeflateDecoder::new(writer); -/// deflater.write_all(&bytes[..])?; -/// writer = deflater.finish()?; -/// let return_string = String::from_utf8(writer).expect("String parsing error"); -/// Ok(return_string) -/// } -/// ``` -#[derive(Debug)] -pub struct DeflateDecoder { - inner: zio::Writer, -} - -impl DeflateDecoder { - /// Creates a new decoder which will write uncompressed data to the stream. - /// - /// When this encoder is dropped or unwrapped the final pieces of data will - /// be flushed. - pub fn new(w: W) -> DeflateDecoder { - DeflateDecoder { - inner: zio::Writer::new(w, Decompress::new(false)), - } - } - - /// Acquires a reference to the underlying writer. - pub fn get_ref(&self) -> &W { - self.inner.get_ref() - } - - /// Acquires a mutable reference to the underlying writer. - /// - /// Note that mutating the output/input state of the stream may corrupt this - /// object, so care must be taken when using this method. - pub fn get_mut(&mut self) -> &mut W { - self.inner.get_mut() - } - - /// Resets the state of this decoder entirely, swapping out the output - /// stream for another. - /// - /// This function will finish encoding the current stream into the current - /// output stream before swapping out the two output streams. - /// - /// This will then reset the internal state of this decoder and replace the - /// output stream with the one provided, returning the previous output - /// stream. Future data written to this decoder will be decompressed into - /// the output stream `w`. - /// - /// # Errors - /// - /// This function will perform I/O to finish the stream, and if that I/O - /// returns an error then that will be returned from this function. - pub fn reset(&mut self, w: W) -> io::Result { - self.inner.finish()?; - self.inner.data = Decompress::new(false); - Ok(self.inner.replace(w)) - } - - /// Attempt to finish this output stream, writing out final chunks of data. - /// - /// Note that this function can only be used once data has finished being - /// written to the output stream. After this function is called then further - /// calls to `write` may result in a panic. - /// - /// # Panics - /// - /// Attempts to write data to this stream may result in a panic after this - /// function is called. - /// - /// # Errors - /// - /// This function will perform I/O to finish the stream, returning any - /// errors which happen. - pub fn try_finish(&mut self) -> io::Result<()> { - self.inner.finish() - } - - /// Consumes this encoder, flushing the output stream. - /// - /// This will flush the underlying data stream and then return the contained - /// writer if the flush succeeded. - /// - /// Note that this function may not be suitable to call in a situation where - /// the underlying stream is an asynchronous I/O stream. To finish a stream - /// the `try_finish` (or `shutdown`) method should be used instead. To - /// re-acquire ownership of a stream it is safe to call this method after - /// `try_finish` or `shutdown` has returned `Ok`. - /// - /// # Errors - /// - /// This function will perform I/O to complete this stream, and any I/O - /// errors which occur will be returned from this function. - pub fn finish(mut self) -> io::Result { - self.inner.finish()?; - Ok(self.inner.take_inner()) - } - - /// Returns the number of bytes that the decompressor has consumed for - /// decompression. - /// - /// Note that this will likely be smaller than the number of bytes - /// successfully written to this stream due to internal buffering. - pub fn total_in(&self) -> u64 { - self.inner.data.total_in() - } - - /// Returns the number of bytes that the decompressor has written to its - /// output stream. - pub fn total_out(&self) -> u64 { - self.inner.data.total_out() - } -} - -impl Write for DeflateDecoder { - fn write(&mut self, buf: &[u8]) -> io::Result { - self.inner.write(buf) - } - - fn flush(&mut self) -> io::Result<()> { - self.inner.flush() - } -} - -impl Read for DeflateDecoder { - fn read(&mut self, buf: &mut [u8]) -> io::Result { - self.inner.get_mut().read(buf) - } -} diff --git a/third_party/rust/flate2/v1/crate/src/ffi/c.rs b/third_party/rust/flate2/v1/crate/src/ffi/c.rs deleted file mode 100644 index 59e20118fb8e..000000000000 --- a/third_party/rust/flate2/v1/crate/src/ffi/c.rs +++ /dev/null @@ -1,421 +0,0 @@ -//! Implementation for C backends. -use std::alloc::{self, Layout}; -use std::cmp; -use std::convert::TryFrom; -use std::fmt; -use std::marker; -use std::ops::{Deref, DerefMut}; -use std::os::raw::{c_int, c_uint, c_void}; -use std::ptr; - -use super::*; -use crate::mem::{self, FlushDecompress, Status}; - -#[derive(Default)] -pub struct ErrorMessage(Option<&'static str>); - -impl ErrorMessage { - pub fn get(&self) -> Option<&str> { - self.0 - } -} - -pub struct StreamWrapper { - pub inner: Box, -} - -impl fmt::Debug for StreamWrapper { - fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { - write!(f, "StreamWrapper") - } -} - -impl Default for StreamWrapper { - fn default() -> StreamWrapper { - StreamWrapper { - inner: Box::new(mz_stream { - next_in: ptr::null_mut(), - avail_in: 0, - total_in: 0, - next_out: ptr::null_mut(), - avail_out: 0, - total_out: 0, - msg: ptr::null_mut(), - adler: 0, - data_type: 0, - reserved: 0, - opaque: ptr::null_mut(), - state: ptr::null_mut(), - #[cfg(all(feature = "any_zlib", not(feature = "cloudflare-zlib-sys")))] - zalloc, - #[cfg(all(feature = "any_zlib", not(feature = "cloudflare-zlib-sys")))] - zfree, - #[cfg(not(all(feature = "any_zlib", not(feature = "cloudflare-zlib-sys"))))] - zalloc: Some(zalloc), - #[cfg(not(all(feature = "any_zlib", not(feature = "cloudflare-zlib-sys"))))] - zfree: Some(zfree), - }), - } - } -} - -const ALIGN: usize = std::mem::align_of::(); - -fn align_up(size: usize, align: usize) -> usize { - (size + align - 1) & !(align - 1) -} - -extern "C" fn zalloc(_ptr: *mut c_void, items: AllocSize, item_size: AllocSize) -> *mut c_void { - // We need to multiply `items` and `item_size` to get the actual desired - // allocation size. Since `zfree` doesn't receive a size argument we - // also need to allocate space for a `usize` as a header so we can store - // how large the allocation is to deallocate later. - let size = match items - .checked_mul(item_size) - .and_then(|i| usize::try_from(i).ok()) - .map(|size| align_up(size, ALIGN)) - .and_then(|i| i.checked_add(std::mem::size_of::())) - { - Some(i) => i, - None => return ptr::null_mut(), - }; - - // Make sure the `size` isn't too big to fail `Layout`'s restrictions - let layout = match Layout::from_size_align(size, ALIGN) { - Ok(layout) => layout, - Err(_) => return ptr::null_mut(), - }; - - unsafe { - // Allocate the data, and if successful store the size we allocated - // at the beginning and then return an offset pointer. - let ptr = alloc::alloc(layout) as *mut usize; - if ptr.is_null() { - return ptr as *mut c_void; - } - *ptr = size; - ptr.add(1) as *mut c_void - } -} - -extern "C" fn zfree(_ptr: *mut c_void, address: *mut c_void) { - unsafe { - // Move our address being freed back one pointer, read the size we - // stored in `zalloc`, and then free it using the standard Rust - // allocator. - let ptr = (address as *mut usize).offset(-1); - let size = *ptr; - let layout = Layout::from_size_align_unchecked(size, ALIGN); - alloc::dealloc(ptr as *mut u8, layout) - } -} - -impl Deref for StreamWrapper { - type Target = mz_stream; - - fn deref(&self) -> &Self::Target { - &*self.inner - } -} - -impl DerefMut for StreamWrapper { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut *self.inner - } -} - -unsafe impl Send for Stream {} -unsafe impl Sync for Stream {} - -/// Trait used to call the right destroy/end function on the inner -/// stream object on drop. -pub trait Direction { - unsafe fn destroy(stream: *mut mz_stream) -> c_int; -} - -#[derive(Debug)] -pub enum DirCompress {} -#[derive(Debug)] -pub enum DirDecompress {} - -#[derive(Debug)] -pub struct Stream { - pub stream_wrapper: StreamWrapper, - pub total_in: u64, - pub total_out: u64, - pub _marker: marker::PhantomData, -} - -impl Stream { - pub fn msg(&self) -> ErrorMessage { - let msg = self.stream_wrapper.msg; - ErrorMessage(if msg.is_null() { - None - } else { - let s = unsafe { std::ffi::CStr::from_ptr(msg) }; - std::str::from_utf8(s.to_bytes()).ok() - }) - } -} - -impl Drop for Stream { - fn drop(&mut self) { - unsafe { - let _ = D::destroy(&mut *self.stream_wrapper); - } - } -} - -impl Direction for DirCompress { - unsafe fn destroy(stream: *mut mz_stream) -> c_int { - mz_deflateEnd(stream) - } -} -impl Direction for DirDecompress { - unsafe fn destroy(stream: *mut mz_stream) -> c_int { - mz_inflateEnd(stream) - } -} - -#[derive(Debug)] -pub struct Inflate { - pub inner: Stream, -} - -impl InflateBackend for Inflate { - fn make(zlib_header: bool, window_bits: u8) -> Self { - unsafe { - let mut state = StreamWrapper::default(); - let ret = mz_inflateInit2( - &mut *state, - if zlib_header { - window_bits as c_int - } else { - -(window_bits as c_int) - }, - ); - assert_eq!(ret, 0); - Inflate { - inner: Stream { - stream_wrapper: state, - total_in: 0, - total_out: 0, - _marker: marker::PhantomData, - }, - } - } - } - - fn decompress( - &mut self, - input: &[u8], - output: &mut [u8], - flush: FlushDecompress, - ) -> Result { - let raw = &mut *self.inner.stream_wrapper; - raw.msg = ptr::null_mut(); - raw.next_in = input.as_ptr() as *mut u8; - raw.avail_in = cmp::min(input.len(), c_uint::max_value() as usize) as c_uint; - raw.next_out = output.as_mut_ptr(); - raw.avail_out = cmp::min(output.len(), c_uint::max_value() as usize) as c_uint; - - let rc = unsafe { mz_inflate(raw, flush as c_int) }; - - // Unfortunately the total counters provided by zlib might be only - // 32 bits wide and overflow while processing large amounts of data. - self.inner.total_in += (raw.next_in as usize - input.as_ptr() as usize) as u64; - self.inner.total_out += (raw.next_out as usize - output.as_ptr() as usize) as u64; - - match rc { - MZ_DATA_ERROR | MZ_STREAM_ERROR => mem::decompress_failed(self.inner.msg()), - MZ_OK => Ok(Status::Ok), - MZ_BUF_ERROR => Ok(Status::BufError), - MZ_STREAM_END => Ok(Status::StreamEnd), - MZ_NEED_DICT => mem::decompress_need_dict(raw.adler as u32), - c => panic!("unknown return code: {}", c), - } - } - - fn reset(&mut self, zlib_header: bool) { - let bits = if zlib_header { - MZ_DEFAULT_WINDOW_BITS - } else { - -MZ_DEFAULT_WINDOW_BITS - }; - unsafe { - inflateReset2(&mut *self.inner.stream_wrapper, bits); - } - self.inner.total_out = 0; - self.inner.total_in = 0; - } -} - -impl Backend for Inflate { - #[inline] - fn total_in(&self) -> u64 { - self.inner.total_in - } - - #[inline] - fn total_out(&self) -> u64 { - self.inner.total_out - } -} - -#[derive(Debug)] -pub struct Deflate { - pub inner: Stream, -} - -impl DeflateBackend for Deflate { - fn make(level: Compression, zlib_header: bool, window_bits: u8) -> Self { - unsafe { - let mut state = StreamWrapper::default(); - let ret = mz_deflateInit2( - &mut *state, - level.0 as c_int, - MZ_DEFLATED, - if zlib_header { - window_bits as c_int - } else { - -(window_bits as c_int) - }, - 8, - MZ_DEFAULT_STRATEGY, - ); - assert_eq!(ret, 0); - Deflate { - inner: Stream { - stream_wrapper: state, - total_in: 0, - total_out: 0, - _marker: marker::PhantomData, - }, - } - } - } - fn compress( - &mut self, - input: &[u8], - output: &mut [u8], - flush: FlushCompress, - ) -> Result { - let raw = &mut *self.inner.stream_wrapper; - raw.msg = ptr::null_mut(); - raw.next_in = input.as_ptr() as *mut _; - raw.avail_in = cmp::min(input.len(), c_uint::max_value() as usize) as c_uint; - raw.next_out = output.as_mut_ptr(); - raw.avail_out = cmp::min(output.len(), c_uint::max_value() as usize) as c_uint; - - let rc = unsafe { mz_deflate(raw, flush as c_int) }; - - // Unfortunately the total counters provided by zlib might be only - // 32 bits wide and overflow while processing large amounts of data. - self.inner.total_in += (raw.next_in as usize - input.as_ptr() as usize) as u64; - self.inner.total_out += (raw.next_out as usize - output.as_ptr() as usize) as u64; - - match rc { - MZ_OK => Ok(Status::Ok), - MZ_BUF_ERROR => Ok(Status::BufError), - MZ_STREAM_END => Ok(Status::StreamEnd), - MZ_STREAM_ERROR => mem::compress_failed(self.inner.msg()), - c => panic!("unknown return code: {}", c), - } - } - - fn reset(&mut self) { - self.inner.total_in = 0; - self.inner.total_out = 0; - let rc = unsafe { mz_deflateReset(&mut *self.inner.stream_wrapper) }; - assert_eq!(rc, MZ_OK); - } -} - -impl Backend for Deflate { - #[inline] - fn total_in(&self) -> u64 { - self.inner.total_in - } - - #[inline] - fn total_out(&self) -> u64 { - self.inner.total_out - } -} - -pub use self::c_backend::*; - -/// For backwards compatibility, we provide symbols as `mz_` to mimic the miniz API -#[allow(bad_style)] -mod c_backend { - use std::mem; - use std::os::raw::{c_char, c_int}; - - #[cfg(feature = "zlib-ng")] - use libz_ng_sys as libz; - - #[cfg(all(not(feature = "zlib-ng"), feature = "cloudflare_zlib"))] - use cloudflare_zlib_sys as libz; - - #[cfg(all(not(feature = "cloudflare_zlib"), not(feature = "zlib-ng")))] - use libz_sys as libz; - - pub use libz::deflate as mz_deflate; - pub use libz::deflateEnd as mz_deflateEnd; - pub use libz::deflateReset as mz_deflateReset; - pub use libz::inflate as mz_inflate; - pub use libz::inflateEnd as mz_inflateEnd; - pub use libz::z_stream as mz_stream; - pub use libz::*; - - pub use libz::Z_BLOCK as MZ_BLOCK; - pub use libz::Z_BUF_ERROR as MZ_BUF_ERROR; - pub use libz::Z_DATA_ERROR as MZ_DATA_ERROR; - pub use libz::Z_DEFAULT_STRATEGY as MZ_DEFAULT_STRATEGY; - pub use libz::Z_DEFLATED as MZ_DEFLATED; - pub use libz::Z_FINISH as MZ_FINISH; - pub use libz::Z_FULL_FLUSH as MZ_FULL_FLUSH; - pub use libz::Z_NEED_DICT as MZ_NEED_DICT; - pub use libz::Z_NO_FLUSH as MZ_NO_FLUSH; - pub use libz::Z_OK as MZ_OK; - pub use libz::Z_PARTIAL_FLUSH as MZ_PARTIAL_FLUSH; - pub use libz::Z_STREAM_END as MZ_STREAM_END; - pub use libz::Z_STREAM_ERROR as MZ_STREAM_ERROR; - pub use libz::Z_SYNC_FLUSH as MZ_SYNC_FLUSH; - pub type AllocSize = libz::uInt; - - pub const MZ_DEFAULT_WINDOW_BITS: c_int = 15; - - #[cfg(feature = "zlib-ng")] - const ZLIB_VERSION: &'static str = "2.1.0.devel\0"; - #[cfg(not(feature = "zlib-ng"))] - const ZLIB_VERSION: &'static str = "1.2.8\0"; - - pub unsafe extern "C" fn mz_deflateInit2( - stream: *mut mz_stream, - level: c_int, - method: c_int, - window_bits: c_int, - mem_level: c_int, - strategy: c_int, - ) -> c_int { - libz::deflateInit2_( - stream, - level, - method, - window_bits, - mem_level, - strategy, - ZLIB_VERSION.as_ptr() as *const c_char, - mem::size_of::() as c_int, - ) - } - pub unsafe extern "C" fn mz_inflateInit2(stream: *mut mz_stream, window_bits: c_int) -> c_int { - libz::inflateInit2_( - stream, - window_bits, - ZLIB_VERSION.as_ptr() as *const c_char, - mem::size_of::() as c_int, - ) - } -} diff --git a/third_party/rust/flate2/v1/crate/src/ffi/mod.rs b/third_party/rust/flate2/v1/crate/src/ffi/mod.rs deleted file mode 100644 index 8bac6e423223..000000000000 --- a/third_party/rust/flate2/v1/crate/src/ffi/mod.rs +++ /dev/null @@ -1,52 +0,0 @@ -//! This module contains backend-specific code. - -use crate::mem::{CompressError, DecompressError, FlushCompress, FlushDecompress, Status}; -use crate::Compression; - -/// Traits specifying the interface of the backends. -/// -/// Sync + Send are added as a condition to ensure they are available -/// for the frontend. -pub trait Backend: Sync + Send { - fn total_in(&self) -> u64; - fn total_out(&self) -> u64; -} - -pub trait InflateBackend: Backend { - fn make(zlib_header: bool, window_bits: u8) -> Self; - fn decompress( - &mut self, - input: &[u8], - output: &mut [u8], - flush: FlushDecompress, - ) -> Result; - fn reset(&mut self, zlib_header: bool); -} - -pub trait DeflateBackend: Backend { - fn make(level: Compression, zlib_header: bool, window_bits: u8) -> Self; - fn compress( - &mut self, - input: &[u8], - output: &mut [u8], - flush: FlushCompress, - ) -> Result; - fn reset(&mut self); -} - -// Default to Rust implementation unless explicitly opted in to a different backend. -#[cfg(feature = "any_zlib")] -mod c; -#[cfg(feature = "any_zlib")] -pub use self::c::*; - -#[cfg(not(feature = "any_zlib"))] -mod rust; -#[cfg(not(feature = "any_zlib"))] -pub use self::rust::*; - -impl std::fmt::Debug for ErrorMessage { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - self.get().fmt(f) - } -} diff --git a/third_party/rust/flate2/v1/crate/src/ffi/rust.rs b/third_party/rust/flate2/v1/crate/src/ffi/rust.rs deleted file mode 100644 index eadd6ec1873f..000000000000 --- a/third_party/rust/flate2/v1/crate/src/ffi/rust.rs +++ /dev/null @@ -1,183 +0,0 @@ -//! Implementation for miniz_oxide rust backend. - -use std::convert::TryInto; -use std::fmt; - -use miniz_oxide::deflate::core::CompressorOxide; -use miniz_oxide::inflate::stream::InflateState; -pub use miniz_oxide::*; - -pub const MZ_NO_FLUSH: isize = MZFlush::None as isize; -pub const MZ_PARTIAL_FLUSH: isize = MZFlush::Partial as isize; -pub const MZ_SYNC_FLUSH: isize = MZFlush::Sync as isize; -pub const MZ_FULL_FLUSH: isize = MZFlush::Full as isize; -pub const MZ_FINISH: isize = MZFlush::Finish as isize; - -use super::*; -use crate::mem; - -// miniz_oxide doesn't provide any error messages (yet?) -#[derive(Default)] -pub struct ErrorMessage; - -impl ErrorMessage { - pub fn get(&self) -> Option<&str> { - None - } -} - -fn format_from_bool(zlib_header: bool) -> DataFormat { - if zlib_header { - DataFormat::Zlib - } else { - DataFormat::Raw - } -} - -pub struct Inflate { - inner: Box, - total_in: u64, - total_out: u64, -} - -impl fmt::Debug for Inflate { - fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { - write!( - f, - "miniz_oxide inflate internal state. total_in: {}, total_out: {}", - self.total_in, self.total_out, - ) - } -} - -impl InflateBackend for Inflate { - fn make(zlib_header: bool, _window_bits: u8) -> Self { - let format = format_from_bool(zlib_header); - - Inflate { - inner: InflateState::new_boxed(format), - total_in: 0, - total_out: 0, - } - } - - fn decompress( - &mut self, - input: &[u8], - output: &mut [u8], - flush: FlushDecompress, - ) -> Result { - let flush = MZFlush::new(flush as i32).unwrap(); - - let res = inflate::stream::inflate(&mut self.inner, input, output, flush); - self.total_in += res.bytes_consumed as u64; - self.total_out += res.bytes_written as u64; - - match res.status { - Ok(status) => match status { - MZStatus::Ok => Ok(Status::Ok), - MZStatus::StreamEnd => Ok(Status::StreamEnd), - MZStatus::NeedDict => { - mem::decompress_need_dict(self.inner.decompressor().adler32().unwrap_or(0)) - } - }, - Err(status) => match status { - MZError::Buf => Ok(Status::BufError), - _ => mem::decompress_failed(ErrorMessage), - }, - } - } - - fn reset(&mut self, zlib_header: bool) { - self.inner.reset(format_from_bool(zlib_header)); - self.total_in = 0; - self.total_out = 0; - } -} - -impl Backend for Inflate { - #[inline] - fn total_in(&self) -> u64 { - self.total_in - } - - #[inline] - fn total_out(&self) -> u64 { - self.total_out - } -} - -pub struct Deflate { - inner: Box, - total_in: u64, - total_out: u64, -} - -impl fmt::Debug for Deflate { - fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { - write!( - f, - "miniz_oxide deflate internal state. total_in: {}, total_out: {}", - self.total_in, self.total_out, - ) - } -} - -impl DeflateBackend for Deflate { - fn make(level: Compression, zlib_header: bool, _window_bits: u8) -> Self { - // Check in case the integer value changes at some point. - debug_assert!(level.level() <= 10); - - let mut inner: Box = Box::default(); - let format = format_from_bool(zlib_header); - inner.set_format_and_level(format, level.level().try_into().unwrap_or(1)); - - Deflate { - inner, - total_in: 0, - total_out: 0, - } - } - - fn compress( - &mut self, - input: &[u8], - output: &mut [u8], - flush: FlushCompress, - ) -> Result { - let flush = MZFlush::new(flush as i32).unwrap(); - let res = deflate::stream::deflate(&mut self.inner, input, output, flush); - self.total_in += res.bytes_consumed as u64; - self.total_out += res.bytes_written as u64; - - match res.status { - Ok(status) => match status { - MZStatus::Ok => Ok(Status::Ok), - MZStatus::StreamEnd => Ok(Status::StreamEnd), - MZStatus::NeedDict => mem::compress_failed(ErrorMessage), - }, - Err(status) => match status { - MZError::Buf => Ok(Status::BufError), - _ => mem::compress_failed(ErrorMessage), - }, - } - } - - fn reset(&mut self) { - self.total_in = 0; - self.total_out = 0; - self.inner.reset(); - } -} - -impl Backend for Deflate { - #[inline] - fn total_in(&self) -> u64 { - self.total_in - } - - #[inline] - fn total_out(&self) -> u64 { - self.total_out - } -} diff --git a/third_party/rust/flate2/v1/crate/src/gz/bufread.rs b/third_party/rust/flate2/v1/crate/src/gz/bufread.rs deleted file mode 100644 index 6be144d0c842..000000000000 --- a/third_party/rust/flate2/v1/crate/src/gz/bufread.rs +++ /dev/null @@ -1,852 +0,0 @@ -use std::cmp; -use std::io; -use std::io::prelude::*; -use std::mem; - -use super::{GzBuilder, GzHeader}; -use super::{FCOMMENT, FEXTRA, FHCRC, FNAME}; -use crate::crc::{Crc, CrcReader}; -use crate::deflate; -use crate::Compression; - -fn copy(into: &mut [u8], from: &[u8], pos: &mut usize) -> usize { - let min = cmp::min(into.len(), from.len() - *pos); - for (slot, val) in into.iter_mut().zip(from[*pos..*pos + min].iter()) { - *slot = *val; - } - *pos += min; - min -} - -pub(crate) fn corrupt() -> io::Error { - io::Error::new( - io::ErrorKind::InvalidInput, - "corrupt gzip stream does not have a matching checksum", - ) -} - -fn bad_header() -> io::Error { - io::Error::new(io::ErrorKind::InvalidInput, "invalid gzip header") -} - -fn read_le_u16(r: &mut Buffer) -> io::Result { - let mut b = [0; 2]; - r.read_and_forget(&mut b)?; - Ok((b[0] as u16) | ((b[1] as u16) << 8)) -} - -fn read_gz_header_part<'a, R: Read>(r: &'a mut Buffer<'a, R>) -> io::Result<()> { - loop { - match r.part.state { - GzHeaderParsingState::Start => { - let mut header = [0; 10]; - r.read_and_forget(&mut header)?; - - if header[0] != 0x1f || header[1] != 0x8b { - return Err(bad_header()); - } - if header[2] != 8 { - return Err(bad_header()); - } - - r.part.flg = header[3]; - r.part.header.mtime = ((header[4] as u32) << 0) - | ((header[5] as u32) << 8) - | ((header[6] as u32) << 16) - | ((header[7] as u32) << 24); - let _xfl = header[8]; - r.part.header.operating_system = header[9]; - r.part.state = GzHeaderParsingState::Xlen; - } - GzHeaderParsingState::Xlen => { - if r.part.flg & FEXTRA != 0 { - r.part.xlen = read_le_u16(r)?; - } - r.part.state = GzHeaderParsingState::Extra; - } - GzHeaderParsingState::Extra => { - if r.part.flg & FEXTRA != 0 { - let mut extra = vec![0; r.part.xlen as usize]; - r.read_and_forget(&mut extra)?; - r.part.header.extra = Some(extra); - } - r.part.state = GzHeaderParsingState::Filename; - } - GzHeaderParsingState::Filename => { - if r.part.flg & FNAME != 0 { - if None == r.part.header.filename { - r.part.header.filename = Some(Vec::new()); - }; - for byte in r.bytes() { - let byte = byte?; - if byte == 0 { - break; - } - } - } - r.part.state = GzHeaderParsingState::Comment; - } - GzHeaderParsingState::Comment => { - if r.part.flg & FCOMMENT != 0 { - if None == r.part.header.comment { - r.part.header.comment = Some(Vec::new()); - }; - for byte in r.bytes() { - let byte = byte?; - if byte == 0 { - break; - } - } - } - r.part.state = GzHeaderParsingState::Crc; - } - GzHeaderParsingState::Crc => { - if r.part.flg & FHCRC != 0 { - let stored_crc = read_le_u16(r)?; - let calced_crc = r.part.crc.sum() as u16; - if stored_crc != calced_crc { - return Err(corrupt()); - } - } - return Ok(()); - } - } - } -} - -pub(crate) fn read_gz_header(r: &mut R) -> io::Result { - let mut part = GzHeaderPartial::new(); - - let result = { - let mut reader = Buffer::new(&mut part, r); - read_gz_header_part(&mut reader) - }; - result.map(|()| part.take_header()) -} - -/// A gzip streaming encoder -/// -/// This structure exposes a [`BufRead`] interface that will read uncompressed data -/// from the underlying reader and expose the compressed version as a [`BufRead`] -/// interface. -/// -/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html -/// -/// # Examples -/// -/// ``` -/// use std::io::prelude::*; -/// use std::io; -/// use flate2::Compression; -/// use flate2::bufread::GzEncoder; -/// use std::fs::File; -/// use std::io::BufReader; -/// -/// // Opens sample file, compresses the contents and returns a Vector or error -/// // File wrapped in a BufReader implements BufRead -/// -/// fn open_hello_world() -> io::Result> { -/// let f = File::open("examples/hello_world.txt")?; -/// let b = BufReader::new(f); -/// let mut gz = GzEncoder::new(b, Compression::fast()); -/// let mut buffer = Vec::new(); -/// gz.read_to_end(&mut buffer)?; -/// Ok(buffer) -/// } -/// ``` -#[derive(Debug)] -pub struct GzEncoder { - inner: deflate::bufread::DeflateEncoder>, - header: Vec, - pos: usize, - eof: bool, -} - -pub fn gz_encoder(header: Vec, r: R, lvl: Compression) -> GzEncoder { - let crc = CrcReader::new(r); - GzEncoder { - inner: deflate::bufread::DeflateEncoder::new(crc, lvl), - header, - pos: 0, - eof: false, - } -} - -impl GzEncoder { - /// Creates a new encoder which will use the given compression level. - /// - /// The encoder is not configured specially for the emitted header. For - /// header configuration, see the `GzBuilder` type. - /// - /// The data read from the stream `r` will be compressed and available - /// through the returned reader. - pub fn new(r: R, level: Compression) -> GzEncoder { - GzBuilder::new().buf_read(r, level) - } - - fn read_footer(&mut self, into: &mut [u8]) -> io::Result { - if self.pos == 8 { - return Ok(0); - } - let crc = self.inner.get_ref().crc(); - let ref arr = [ - (crc.sum() >> 0) as u8, - (crc.sum() >> 8) as u8, - (crc.sum() >> 16) as u8, - (crc.sum() >> 24) as u8, - (crc.amount() >> 0) as u8, - (crc.amount() >> 8) as u8, - (crc.amount() >> 16) as u8, - (crc.amount() >> 24) as u8, - ]; - Ok(copy(into, arr, &mut self.pos)) - } -} - -impl GzEncoder { - /// Acquires a reference to the underlying reader. - pub fn get_ref(&self) -> &R { - self.inner.get_ref().get_ref() - } - - /// Acquires a mutable reference to the underlying reader. - /// - /// Note that mutation of the reader may result in surprising results if - /// this encoder is continued to be used. - pub fn get_mut(&mut self) -> &mut R { - self.inner.get_mut().get_mut() - } - - /// Returns the underlying stream, consuming this encoder - pub fn into_inner(self) -> R { - self.inner.into_inner().into_inner() - } -} - -#[inline] -fn finish(buf: &[u8; 8]) -> (u32, u32) { - let crc = ((buf[0] as u32) << 0) - | ((buf[1] as u32) << 8) - | ((buf[2] as u32) << 16) - | ((buf[3] as u32) << 24); - let amt = ((buf[4] as u32) << 0) - | ((buf[5] as u32) << 8) - | ((buf[6] as u32) << 16) - | ((buf[7] as u32) << 24); - (crc, amt) -} - -impl Read for GzEncoder { - fn read(&mut self, mut into: &mut [u8]) -> io::Result { - let mut amt = 0; - if self.eof { - return self.read_footer(into); - } else if self.pos < self.header.len() { - amt += copy(into, &self.header, &mut self.pos); - if amt == into.len() { - return Ok(amt); - } - let tmp = into; - into = &mut tmp[amt..]; - } - match self.inner.read(into)? { - 0 => { - self.eof = true; - self.pos = 0; - self.read_footer(into) - } - n => Ok(amt + n), - } - } -} - -impl Write for GzEncoder { - fn write(&mut self, buf: &[u8]) -> io::Result { - self.get_mut().write(buf) - } - - fn flush(&mut self) -> io::Result<()> { - self.get_mut().flush() - } -} - -/// A gzip streaming decoder -/// -/// This structure consumes a [`BufRead`] interface, reading compressed data -/// from the underlying reader, and emitting uncompressed data. -/// -/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html -/// -/// # Examples -/// -/// ``` -/// use std::io::prelude::*; -/// use std::io; -/// # use flate2::Compression; -/// # use flate2::write::GzEncoder; -/// use flate2::bufread::GzDecoder; -/// -/// # fn main() { -/// # let mut e = GzEncoder::new(Vec::new(), Compression::default()); -/// # e.write_all(b"Hello World").unwrap(); -/// # let bytes = e.finish().unwrap(); -/// # println!("{}", decode_reader(bytes).unwrap()); -/// # } -/// # -/// // Uncompresses a Gz Encoded vector of bytes and returns a string or error -/// // Here &[u8] implements BufRead -/// -/// fn decode_reader(bytes: Vec) -> io::Result { -/// let mut gz = GzDecoder::new(&bytes[..]); -/// let mut s = String::new(); -/// gz.read_to_string(&mut s)?; -/// Ok(s) -/// } -/// ``` -#[derive(Debug)] -pub struct GzDecoder { - inner: GzState, - header: Option, - reader: CrcReader>, - multi: bool, -} - -#[derive(Debug)] -pub enum GzHeaderParsingState { - Start, - Xlen, - Extra, - Filename, - Comment, - Crc, -} - -#[derive(Debug)] -pub struct GzHeaderPartial { - buf: Vec, - state: GzHeaderParsingState, - flg: u8, - xlen: u16, - crc: Crc, - header: GzHeader, -} - -impl GzHeaderPartial { - fn new() -> GzHeaderPartial { - GzHeaderPartial { - buf: Vec::with_capacity(10), // minimum header length - state: GzHeaderParsingState::Start, - flg: 0, - xlen: 0, - crc: Crc::new(), - header: GzHeader { - extra: None, - filename: None, - comment: None, - operating_system: 0, - mtime: 0, - }, - } - } - - pub fn take_header(self) -> GzHeader { - self.header - } -} - -#[derive(Debug)] -enum GzState { - Header(GzHeaderPartial), - Body, - Finished(usize, [u8; 8]), - Err(io::Error), - End, -} - -/// A small adapter which reads data originally from `buf` and then reads all -/// further data from `reader`. This will also buffer all data read from -/// `reader` into `buf` for reuse on a further call. -struct Buffer<'a, T: 'a> { - part: &'a mut GzHeaderPartial, - buf_cur: usize, - buf_max: usize, - reader: &'a mut T, -} - -impl<'a, T> Buffer<'a, T> { - fn new(part: &'a mut GzHeaderPartial, reader: &'a mut T) -> Buffer<'a, T> { - Buffer { - reader, - buf_cur: 0, - buf_max: part.buf.len(), - part, - } - } -} - -impl<'a, T: Read> Read for Buffer<'a, T> { - fn read(&mut self, buf: &mut [u8]) -> io::Result { - let mut bufref = match self.part.state { - GzHeaderParsingState::Filename => self.part.header.filename.as_mut(), - GzHeaderParsingState::Comment => self.part.header.comment.as_mut(), - _ => None, - }; - if let Some(ref mut b) = bufref { - // we have a direct reference to a buffer where to write - let len = self.reader.read(buf)?; - if len > 0 && buf[len - 1] == 0 { - // we do not append the final 0 - b.extend_from_slice(&buf[..len - 1]); - } else { - b.extend_from_slice(&buf[..len]); - } - self.part.crc.update(&buf[..len]); - Ok(len) - } else if self.buf_cur == self.buf_max { - // we read new bytes and also save them in self.part.buf - let len = self.reader.read(buf)?; - self.part.buf.extend_from_slice(&buf[..len]); - self.part.crc.update(&buf[..len]); - Ok(len) - } else { - // we first read the previously saved bytes - let len = (&self.part.buf[self.buf_cur..self.buf_max]).read(buf)?; - self.buf_cur += len; - Ok(len) - } - } -} - -impl<'a, T> Buffer<'a, T> -where - T: std::io::Read, -{ - // If we manage to read all the bytes, we reset the buffer - fn read_and_forget(&mut self, buf: &mut [u8]) -> io::Result { - self.read_exact(buf)?; - // we managed to read the whole buf - // we will no longer need the previously saved bytes in self.part.buf - let rlen = buf.len(); - self.part.buf.truncate(0); - self.buf_cur = 0; - self.buf_max = 0; - Ok(rlen) - } -} - -impl GzDecoder { - /// Creates a new decoder from the given reader, immediately parsing the - /// gzip header. - pub fn new(mut r: R) -> GzDecoder { - let mut part = GzHeaderPartial::new(); - let mut header = None; - - let result = { - let mut reader = Buffer::new(&mut part, &mut r); - read_gz_header_part(&mut reader) - }; - - let state = match result { - Ok(()) => { - header = Some(part.take_header()); - GzState::Body - } - Err(ref err) if io::ErrorKind::WouldBlock == err.kind() => GzState::Header(part), - Err(err) => GzState::Err(err), - }; - - GzDecoder { - inner: state, - reader: CrcReader::new(deflate::bufread::DeflateDecoder::new(r)), - multi: false, - header, - } - } - - fn multi(mut self, flag: bool) -> GzDecoder { - self.multi = flag; - self - } -} - -impl GzDecoder { - /// Returns the header associated with this stream, if it was valid - pub fn header(&self) -> Option<&GzHeader> { - self.header.as_ref() - } - - /// Acquires a reference to the underlying reader. - pub fn get_ref(&self) -> &R { - self.reader.get_ref().get_ref() - } - - /// Acquires a mutable reference to the underlying stream. - /// - /// Note that mutation of the stream may result in surprising results if - /// this encoder is continued to be used. - pub fn get_mut(&mut self) -> &mut R { - self.reader.get_mut().get_mut() - } - - /// Consumes this decoder, returning the underlying reader. - pub fn into_inner(self) -> R { - self.reader.into_inner().into_inner() - } -} - -impl Read for GzDecoder { - fn read(&mut self, into: &mut [u8]) -> io::Result { - let GzDecoder { - inner, - header, - reader, - multi, - } = self; - - loop { - *inner = match mem::replace(inner, GzState::End) { - GzState::Header(mut part) => { - let result = { - let mut reader = Buffer::new(&mut part, reader.get_mut().get_mut()); - read_gz_header_part(&mut reader) - }; - match result { - Ok(()) => { - *header = Some(part.take_header()); - GzState::Body - } - Err(err) if io::ErrorKind::WouldBlock == err.kind() => { - *inner = GzState::Header(part); - return Err(err); - } - Err(err) => return Err(err), - } - } - GzState::Body => { - if into.is_empty() { - *inner = GzState::Body; - return Ok(0); - } - - let n = reader.read(into).map_err(|err| { - if io::ErrorKind::WouldBlock == err.kind() { - *inner = GzState::Body; - } - - err - })?; - - match n { - 0 => GzState::Finished(0, [0; 8]), - n => { - *inner = GzState::Body; - return Ok(n); - } - } - } - GzState::Finished(pos, mut buf) => { - if pos < buf.len() { - let n = reader - .get_mut() - .get_mut() - .read(&mut buf[pos..]) - .and_then(|n| { - if n == 0 { - Err(io::ErrorKind::UnexpectedEof.into()) - } else { - Ok(n) - } - }) - .map_err(|err| { - if io::ErrorKind::WouldBlock == err.kind() { - *inner = GzState::Finished(pos, buf); - } - - err - })?; - - GzState::Finished(pos + n, buf) - } else { - let (crc, amt) = finish(&buf); - - if crc != reader.crc().sum() || amt != reader.crc().amount() { - return Err(corrupt()); - } else if *multi { - let is_eof = reader - .get_mut() - .get_mut() - .fill_buf() - .map(|buf| buf.is_empty()) - .map_err(|err| { - if io::ErrorKind::WouldBlock == err.kind() { - *inner = GzState::Finished(pos, buf); - } - - err - })?; - - if is_eof { - GzState::End - } else { - reader.reset(); - reader.get_mut().reset_data(); - header.take(); - GzState::Header(GzHeaderPartial::new()) - } - } else { - GzState::End - } - } - } - GzState::Err(err) => return Err(err), - GzState::End => return Ok(0), - }; - } - } -} - -impl Write for GzDecoder { - fn write(&mut self, buf: &[u8]) -> io::Result { - self.get_mut().write(buf) - } - - fn flush(&mut self) -> io::Result<()> { - self.get_mut().flush() - } -} - -/// A gzip streaming decoder that decodes all members of a multistream -/// -/// A gzip member consists of a header, compressed data and a trailer. The [gzip -/// specification](https://tools.ietf.org/html/rfc1952), however, allows multiple -/// gzip members to be joined in a single stream. `MultiGzDecoder` will -/// decode all consecutive members while `GzDecoder` will only decompress -/// the first gzip member. The multistream format is commonly used in -/// bioinformatics, for example when using the BGZF compressed data. -/// -/// This structure exposes a [`BufRead`] interface that will consume all gzip members -/// from the underlying reader and emit uncompressed data. -/// -/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html -/// -/// # Examples -/// -/// ``` -/// use std::io::prelude::*; -/// use std::io; -/// # use flate2::Compression; -/// # use flate2::write::GzEncoder; -/// use flate2::bufread::MultiGzDecoder; -/// -/// # fn main() { -/// # let mut e = GzEncoder::new(Vec::new(), Compression::default()); -/// # e.write_all(b"Hello World").unwrap(); -/// # let bytes = e.finish().unwrap(); -/// # println!("{}", decode_reader(bytes).unwrap()); -/// # } -/// # -/// // Uncompresses a Gz Encoded vector of bytes and returns a string or error -/// // Here &[u8] implements BufRead -/// -/// fn decode_reader(bytes: Vec) -> io::Result { -/// let mut gz = MultiGzDecoder::new(&bytes[..]); -/// let mut s = String::new(); -/// gz.read_to_string(&mut s)?; -/// Ok(s) -/// } -/// ``` -#[derive(Debug)] -pub struct MultiGzDecoder(GzDecoder); - -impl MultiGzDecoder { - /// Creates a new decoder from the given reader, immediately parsing the - /// (first) gzip header. If the gzip stream contains multiple members all will - /// be decoded. - pub fn new(r: R) -> MultiGzDecoder { - MultiGzDecoder(GzDecoder::new(r).multi(true)) - } -} - -impl MultiGzDecoder { - /// Returns the current header associated with this stream, if it's valid - pub fn header(&self) -> Option<&GzHeader> { - self.0.header() - } - - /// Acquires a reference to the underlying reader. - pub fn get_ref(&self) -> &R { - self.0.get_ref() - } - - /// Acquires a mutable reference to the underlying stream. - /// - /// Note that mutation of the stream may result in surprising results if - /// this encoder is continued to be used. - pub fn get_mut(&mut self) -> &mut R { - self.0.get_mut() - } - - /// Consumes this decoder, returning the underlying reader. - pub fn into_inner(self) -> R { - self.0.into_inner() - } -} - -impl Read for MultiGzDecoder { - fn read(&mut self, into: &mut [u8]) -> io::Result { - self.0.read(into) - } -} - -#[cfg(test)] -pub mod tests { - use crate::gz::bufread::*; - use std::io; - use std::io::{Cursor, Read, Write}; - - //a cursor turning EOF into blocking errors - #[derive(Debug)] - pub struct BlockingCursor { - pub cursor: Cursor>, - } - - impl BlockingCursor { - pub fn new() -> BlockingCursor { - BlockingCursor { - cursor: Cursor::new(Vec::new()), - } - } - - pub fn set_position(&mut self, pos: u64) { - return self.cursor.set_position(pos); - } - - pub fn position(&mut self) -> u64 { - return self.cursor.position(); - } - } - - impl Write for BlockingCursor { - fn write(&mut self, buf: &[u8]) -> io::Result { - return self.cursor.write(buf); - } - fn flush(&mut self) -> io::Result<()> { - return self.cursor.flush(); - } - } - - impl Read for BlockingCursor { - fn read(&mut self, buf: &mut [u8]) -> io::Result { - //use the cursor, except it turns eof into blocking error - let r = self.cursor.read(buf); - match r { - Err(ref err) => { - if err.kind() == io::ErrorKind::UnexpectedEof { - return Err(io::ErrorKind::WouldBlock.into()); - } - } - Ok(0) => { - //regular EOF turned into blocking error - return Err(io::ErrorKind::WouldBlock.into()); - } - Ok(_n) => {} - } - return r; - } - } - #[test] - // test function read_and_forget of Buffer - fn buffer_read_and_forget() { - // this is unused except for the buffering - let mut part = GzHeaderPartial::new(); - // this is a reader which receives data afterwards - let mut r = BlockingCursor::new(); - let data = vec![1, 2, 3]; - let mut out = Vec::with_capacity(7); - - match r.write_all(&data) { - Ok(()) => {} - _ => { - panic!("Unexpected result for write_all"); - } - } - r.set_position(0); - - // First read : successful for one byte - let mut reader = Buffer::new(&mut part, &mut r); - out.resize(1, 0); - match reader.read_and_forget(&mut out) { - Ok(1) => {} - _ => { - panic!("Unexpected result for read_and_forget with data"); - } - } - - // Second read : incomplete for 7 bytes (we have only 2) - out.resize(7, 0); - match reader.read_and_forget(&mut out) { - Err(ref err) => { - assert_eq!(io::ErrorKind::WouldBlock, err.kind()); - } - _ => { - panic!("Unexpected result for read_and_forget with incomplete"); - } - } - - // 3 more data bytes have arrived - let pos = r.position(); - let data2 = vec![4, 5, 6]; - match r.write_all(&data2) { - Ok(()) => {} - _ => { - panic!("Unexpected result for write_all"); - } - } - r.set_position(pos); - - // Third read : still incomplete for 7 bytes (we have 5) - let mut reader2 = Buffer::new(&mut part, &mut r); - match reader2.read_and_forget(&mut out) { - Err(ref err) => { - assert_eq!(io::ErrorKind::WouldBlock, err.kind()); - } - _ => { - panic!("Unexpected result for read_and_forget with more incomplete"); - } - } - - // 3 more data bytes have arrived again - let pos2 = r.position(); - let data3 = vec![7, 8, 9]; - match r.write_all(&data3) { - Ok(()) => {} - _ => { - panic!("Unexpected result for write_all"); - } - } - r.set_position(pos2); - - // Fourth read : now successful for 7 bytes - let mut reader3 = Buffer::new(&mut part, &mut r); - match reader3.read_and_forget(&mut out) { - Ok(7) => { - assert_eq!(out[0], 2); - assert_eq!(out[6], 8); - } - _ => { - panic!("Unexpected result for read_and_forget with data"); - } - } - - // Fifth read : successful for one more byte - out.resize(1, 0); - match reader3.read_and_forget(&mut out) { - Ok(1) => { - assert_eq!(out[0], 9); - } - _ => { - panic!("Unexpected result for read_and_forget with data"); - } - } - } -} diff --git a/third_party/rust/flate2/v1/crate/src/gz/mod.rs b/third_party/rust/flate2/v1/crate/src/gz/mod.rs deleted file mode 100644 index 505450e3e972..000000000000 --- a/third_party/rust/flate2/v1/crate/src/gz/mod.rs +++ /dev/null @@ -1,385 +0,0 @@ -use std::ffi::CString; -use std::io::prelude::*; -use std::time; - -use crate::bufreader::BufReader; -use crate::Compression; - -pub static FHCRC: u8 = 1 << 1; -pub static FEXTRA: u8 = 1 << 2; -pub static FNAME: u8 = 1 << 3; -pub static FCOMMENT: u8 = 1 << 4; - -pub mod bufread; -pub mod read; -pub mod write; - -/// A structure representing the header of a gzip stream. -/// -/// The header can contain metadata about the file that was compressed, if -/// present. -#[derive(PartialEq, Clone, Debug, Default)] -pub struct GzHeader { - extra: Option>, - filename: Option>, - comment: Option>, - operating_system: u8, - mtime: u32, -} - -impl GzHeader { - /// Returns the `filename` field of this gzip stream's header, if present. - pub fn filename(&self) -> Option<&[u8]> { - self.filename.as_ref().map(|s| &s[..]) - } - - /// Returns the `extra` field of this gzip stream's header, if present. - pub fn extra(&self) -> Option<&[u8]> { - self.extra.as_ref().map(|s| &s[..]) - } - - /// Returns the `comment` field of this gzip stream's header, if present. - pub fn comment(&self) -> Option<&[u8]> { - self.comment.as_ref().map(|s| &s[..]) - } - - /// Returns the `operating_system` field of this gzip stream's header. - /// - /// There are predefined values for various operating systems. - /// 255 means that the value is unknown. - pub fn operating_system(&self) -> u8 { - self.operating_system - } - - /// This gives the most recent modification time of the original file being compressed. - /// - /// The time is in Unix format, i.e., seconds since 00:00:00 GMT, Jan. 1, 1970. - /// (Note that this may cause problems for MS-DOS and other systems that use local - /// rather than Universal time.) If the compressed data did not come from a file, - /// `mtime` is set to the time at which compression started. - /// `mtime` = 0 means no time stamp is available. - /// - /// The usage of `mtime` is discouraged because of Year 2038 problem. - pub fn mtime(&self) -> u32 { - self.mtime - } - - /// Returns the most recent modification time represented by a date-time type. - /// Returns `None` if the value of the underlying counter is 0, - /// indicating no time stamp is available. - /// - /// - /// The time is measured as seconds since 00:00:00 GMT, Jan. 1 1970. - /// See [`mtime`](#method.mtime) for more detail. - pub fn mtime_as_datetime(&self) -> Option { - if self.mtime == 0 { - None - } else { - let duration = time::Duration::new(u64::from(self.mtime), 0); - let datetime = time::UNIX_EPOCH + duration; - Some(datetime) - } - } -} - -/// A builder structure to create a new gzip Encoder. -/// -/// This structure controls header configuration options such as the filename. -/// -/// # Examples -/// -/// ``` -/// use std::io::prelude::*; -/// # use std::io; -/// use std::fs::File; -/// use flate2::GzBuilder; -/// use flate2::Compression; -/// -/// // GzBuilder opens a file and writes a sample string using GzBuilder pattern -/// -/// # fn sample_builder() -> Result<(), io::Error> { -/// let f = File::create("examples/hello_world.gz")?; -/// let mut gz = GzBuilder::new() -/// .filename("hello_world.txt") -/// .comment("test file, please delete") -/// .write(f, Compression::default()); -/// gz.write_all(b"hello world")?; -/// gz.finish()?; -/// # Ok(()) -/// # } -/// ``` -#[derive(Debug)] -pub struct GzBuilder { - extra: Option>, - filename: Option, - comment: Option, - operating_system: Option, - mtime: u32, -} - -impl Default for GzBuilder { - fn default() -> Self { - Self::new() - } -} - -impl GzBuilder { - /// Create a new blank builder with no header by default. - pub fn new() -> GzBuilder { - GzBuilder { - extra: None, - filename: None, - comment: None, - operating_system: None, - mtime: 0, - } - } - - /// Configure the `mtime` field in the gzip header. - pub fn mtime(mut self, mtime: u32) -> GzBuilder { - self.mtime = mtime; - self - } - - /// Configure the `operating_system` field in the gzip header. - pub fn operating_system(mut self, os: u8) -> GzBuilder { - self.operating_system = Some(os); - self - } - - /// Configure the `extra` field in the gzip header. - pub fn extra>>(mut self, extra: T) -> GzBuilder { - self.extra = Some(extra.into()); - self - } - - /// Configure the `filename` field in the gzip header. - /// - /// # Panics - /// - /// Panics if the `filename` slice contains a zero. - pub fn filename>>(mut self, filename: T) -> GzBuilder { - self.filename = Some(CString::new(filename.into()).unwrap()); - self - } - - /// Configure the `comment` field in the gzip header. - /// - /// # Panics - /// - /// Panics if the `comment` slice contains a zero. - pub fn comment>>(mut self, comment: T) -> GzBuilder { - self.comment = Some(CString::new(comment.into()).unwrap()); - self - } - - /// Consume this builder, creating a writer encoder in the process. - /// - /// The data written to the returned encoder will be compressed and then - /// written out to the supplied parameter `w`. - pub fn write(self, w: W, lvl: Compression) -> write::GzEncoder { - write::gz_encoder(self.into_header(lvl), w, lvl) - } - - /// Consume this builder, creating a reader encoder in the process. - /// - /// Data read from the returned encoder will be the compressed version of - /// the data read from the given reader. - pub fn read(self, r: R, lvl: Compression) -> read::GzEncoder { - read::gz_encoder(self.buf_read(BufReader::new(r), lvl)) - } - - /// Consume this builder, creating a reader encoder in the process. - /// - /// Data read from the returned encoder will be the compressed version of - /// the data read from the given reader. - pub fn buf_read(self, r: R, lvl: Compression) -> bufread::GzEncoder - where - R: BufRead, - { - bufread::gz_encoder(self.into_header(lvl), r, lvl) - } - - fn into_header(self, lvl: Compression) -> Vec { - let GzBuilder { - extra, - filename, - comment, - operating_system, - mtime, - } = self; - let mut flg = 0; - let mut header = vec![0u8; 10]; - if let Some(v) = extra { - flg |= FEXTRA; - header.push((v.len() >> 0) as u8); - header.push((v.len() >> 8) as u8); - header.extend(v); - } - if let Some(filename) = filename { - flg |= FNAME; - header.extend(filename.as_bytes_with_nul().iter().map(|x| *x)); - } - if let Some(comment) = comment { - flg |= FCOMMENT; - header.extend(comment.as_bytes_with_nul().iter().map(|x| *x)); - } - header[0] = 0x1f; - header[1] = 0x8b; - header[2] = 8; - header[3] = flg; - header[4] = (mtime >> 0) as u8; - header[5] = (mtime >> 8) as u8; - header[6] = (mtime >> 16) as u8; - header[7] = (mtime >> 24) as u8; - header[8] = if lvl.0 >= Compression::best().0 { - 2 - } else if lvl.0 <= Compression::fast().0 { - 4 - } else { - 0 - }; - - // Typically this byte indicates what OS the gz stream was created on, - // but in an effort to have cross-platform reproducible streams just - // default this value to 255. I'm not sure that if we "correctly" set - // this it'd do anything anyway... - header[9] = operating_system.unwrap_or(255); - header - } -} - -#[cfg(test)] -mod tests { - use std::io::prelude::*; - - use super::{read, write, GzBuilder}; - use crate::Compression; - use rand::{thread_rng, Rng}; - - #[test] - fn roundtrip() { - let mut e = write::GzEncoder::new(Vec::new(), Compression::default()); - e.write_all(b"foo bar baz").unwrap(); - let inner = e.finish().unwrap(); - let mut d = read::GzDecoder::new(&inner[..]); - let mut s = String::new(); - d.read_to_string(&mut s).unwrap(); - assert_eq!(s, "foo bar baz"); - } - - #[test] - fn roundtrip_zero() { - let e = write::GzEncoder::new(Vec::new(), Compression::default()); - let inner = e.finish().unwrap(); - let mut d = read::GzDecoder::new(&inner[..]); - let mut s = String::new(); - d.read_to_string(&mut s).unwrap(); - assert_eq!(s, ""); - } - - #[test] - fn roundtrip_big() { - let mut real = Vec::new(); - let mut w = write::GzEncoder::new(Vec::new(), Compression::default()); - let v = crate::random_bytes().take(1024).collect::>(); - for _ in 0..200 { - let to_write = &v[..thread_rng().gen_range(0..v.len())]; - real.extend(to_write.iter().map(|x| *x)); - w.write_all(to_write).unwrap(); - } - let result = w.finish().unwrap(); - let mut r = read::GzDecoder::new(&result[..]); - let mut v = Vec::new(); - r.read_to_end(&mut v).unwrap(); - assert!(v == real); - } - - #[test] - fn roundtrip_big2() { - let v = crate::random_bytes().take(1024 * 1024).collect::>(); - let mut r = read::GzDecoder::new(read::GzEncoder::new(&v[..], Compression::default())); - let mut res = Vec::new(); - r.read_to_end(&mut res).unwrap(); - assert!(res == v); - } - - #[test] - fn fields() { - let r = vec![0, 2, 4, 6]; - let e = GzBuilder::new() - .filename("foo.rs") - .comment("bar") - .extra(vec![0, 1, 2, 3]) - .read(&r[..], Compression::default()); - let mut d = read::GzDecoder::new(e); - assert_eq!(d.header().unwrap().filename(), Some(&b"foo.rs"[..])); - assert_eq!(d.header().unwrap().comment(), Some(&b"bar"[..])); - assert_eq!(d.header().unwrap().extra(), Some(&b"\x00\x01\x02\x03"[..])); - let mut res = Vec::new(); - d.read_to_end(&mut res).unwrap(); - assert_eq!(res, vec![0, 2, 4, 6]); - } - - #[test] - fn keep_reading_after_end() { - let mut e = write::GzEncoder::new(Vec::new(), Compression::default()); - e.write_all(b"foo bar baz").unwrap(); - let inner = e.finish().unwrap(); - let mut d = read::GzDecoder::new(&inner[..]); - let mut s = String::new(); - d.read_to_string(&mut s).unwrap(); - assert_eq!(s, "foo bar baz"); - d.read_to_string(&mut s).unwrap(); - assert_eq!(s, "foo bar baz"); - } - - #[test] - fn qc_reader() { - ::quickcheck::quickcheck(test as fn(_) -> _); - - fn test(v: Vec) -> bool { - let r = read::GzEncoder::new(&v[..], Compression::default()); - let mut r = read::GzDecoder::new(r); - let mut v2 = Vec::new(); - r.read_to_end(&mut v2).unwrap(); - v == v2 - } - } - - #[test] - fn flush_after_write() { - let mut f = write::GzEncoder::new(Vec::new(), Compression::default()); - write!(f, "Hello world").unwrap(); - f.flush().unwrap(); - } - - use crate::gz::bufread::tests::BlockingCursor; - #[test] - // test function read_and_forget of Buffer - fn blocked_partial_header_read() { - // this is a reader which receives data afterwards - let mut r = BlockingCursor::new(); - let data = vec![1, 2, 3]; - - match r.write_all(&data) { - Ok(()) => {} - _ => { - panic!("Unexpected result for write_all"); - } - } - r.set_position(0); - - // this is unused except for the buffering - let mut decoder = read::GzDecoder::new(r); - let mut out = Vec::with_capacity(7); - match decoder.read(&mut out) { - Err(e) => { - assert_eq!(e.kind(), std::io::ErrorKind::WouldBlock); - } - _ => { - panic!("Unexpected result for decoder.read"); - } - } - } -} diff --git a/third_party/rust/flate2/v1/crate/src/gz/read.rs b/third_party/rust/flate2/v1/crate/src/gz/read.rs deleted file mode 100644 index dbbe63282950..000000000000 --- a/third_party/rust/flate2/v1/crate/src/gz/read.rs +++ /dev/null @@ -1,278 +0,0 @@ -use std::io; -use std::io::prelude::*; - -use super::bufread; -use super::{GzBuilder, GzHeader}; -use crate::bufreader::BufReader; -use crate::Compression; - -/// A gzip streaming encoder -/// -/// This structure exposes a [`Read`] interface that will read uncompressed data -/// from the underlying reader and expose the compressed version as a [`Read`] -/// interface. -/// -/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html -/// -/// # Examples -/// -/// ``` -/// use std::io::prelude::*; -/// use std::io; -/// use flate2::Compression; -/// use flate2::read::GzEncoder; -/// -/// // Return a vector containing the GZ compressed version of hello world -/// -/// fn gzencode_hello_world() -> io::Result> { -/// let mut ret_vec = [0;100]; -/// let bytestring = b"hello world"; -/// let mut gz = GzEncoder::new(&bytestring[..], Compression::fast()); -/// let count = gz.read(&mut ret_vec)?; -/// Ok(ret_vec[0..count].to_vec()) -/// } -/// ``` -#[derive(Debug)] -pub struct GzEncoder { - inner: bufread::GzEncoder>, -} - -pub fn gz_encoder(inner: bufread::GzEncoder>) -> GzEncoder { - GzEncoder { inner } -} - -impl GzEncoder { - /// Creates a new encoder which will use the given compression level. - /// - /// The encoder is not configured specially for the emitted header. For - /// header configuration, see the `GzBuilder` type. - /// - /// The data read from the stream `r` will be compressed and available - /// through the returned reader. - pub fn new(r: R, level: Compression) -> GzEncoder { - GzBuilder::new().read(r, level) - } -} - -impl GzEncoder { - /// Acquires a reference to the underlying reader. - pub fn get_ref(&self) -> &R { - self.inner.get_ref().get_ref() - } - - /// Acquires a mutable reference to the underlying reader. - /// - /// Note that mutation of the reader may result in surprising results if - /// this encoder is continued to be used. - pub fn get_mut(&mut self) -> &mut R { - self.inner.get_mut().get_mut() - } - - /// Returns the underlying stream, consuming this encoder - pub fn into_inner(self) -> R { - self.inner.into_inner().into_inner() - } -} - -impl Read for GzEncoder { - fn read(&mut self, into: &mut [u8]) -> io::Result { - self.inner.read(into) - } -} - -impl Write for GzEncoder { - fn write(&mut self, buf: &[u8]) -> io::Result { - self.get_mut().write(buf) - } - - fn flush(&mut self) -> io::Result<()> { - self.get_mut().flush() - } -} - -/// A gzip streaming decoder -/// -/// This structure exposes a [`Read`] interface that will consume compressed -/// data from the underlying reader and emit uncompressed data. -/// -/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html -/// -/// # Examples -/// -/// ``` -/// -/// use std::io::prelude::*; -/// use std::io; -/// # use flate2::Compression; -/// # use flate2::write::GzEncoder; -/// use flate2::read::GzDecoder; -/// -/// # fn main() { -/// # let mut e = GzEncoder::new(Vec::new(), Compression::default()); -/// # e.write_all(b"Hello World").unwrap(); -/// # let bytes = e.finish().unwrap(); -/// # println!("{}", decode_reader(bytes).unwrap()); -/// # } -/// # -/// // Uncompresses a Gz Encoded vector of bytes and returns a string or error -/// // Here &[u8] implements Read -/// -/// fn decode_reader(bytes: Vec) -> io::Result { -/// let mut gz = GzDecoder::new(&bytes[..]); -/// let mut s = String::new(); -/// gz.read_to_string(&mut s)?; -/// Ok(s) -/// } -/// ``` -#[derive(Debug)] -pub struct GzDecoder { - inner: bufread::GzDecoder>, -} - -impl GzDecoder { - /// Creates a new decoder from the given reader, immediately parsing the - /// gzip header. - pub fn new(r: R) -> GzDecoder { - GzDecoder { - inner: bufread::GzDecoder::new(BufReader::new(r)), - } - } -} - -impl GzDecoder { - /// Returns the header associated with this stream, if it was valid. - pub fn header(&self) -> Option<&GzHeader> { - self.inner.header() - } - - /// Acquires a reference to the underlying reader. - pub fn get_ref(&self) -> &R { - self.inner.get_ref().get_ref() - } - - /// Acquires a mutable reference to the underlying stream. - /// - /// Note that mutation of the stream may result in surprising results if - /// this encoder is continued to be used. - pub fn get_mut(&mut self) -> &mut R { - self.inner.get_mut().get_mut() - } - - /// Consumes this decoder, returning the underlying reader. - pub fn into_inner(self) -> R { - self.inner.into_inner().into_inner() - } -} - -impl Read for GzDecoder { - fn read(&mut self, into: &mut [u8]) -> io::Result { - self.inner.read(into) - } -} - -impl Write for GzDecoder { - fn write(&mut self, buf: &[u8]) -> io::Result { - self.get_mut().write(buf) - } - - fn flush(&mut self) -> io::Result<()> { - self.get_mut().flush() - } -} - -/// A gzip streaming decoder that decodes all members of a multistream -/// -/// A gzip member consists of a header, compressed data and a trailer. The [gzip -/// specification](https://tools.ietf.org/html/rfc1952), however, allows multiple -/// gzip members to be joined in a single stream. `MultiGzDecoder` will -/// decode all consecutive members while `GzDecoder` will only decompress the -/// first gzip member. The multistream format is commonly used in bioinformatics, -/// for example when using the BGZF compressed data. -/// -/// This structure exposes a [`Read`] interface that will consume all gzip members -/// from the underlying reader and emit uncompressed data. -/// -/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html -/// -/// # Examples -/// -/// ``` -/// use std::io::prelude::*; -/// use std::io; -/// # use flate2::Compression; -/// # use flate2::write::GzEncoder; -/// use flate2::read::MultiGzDecoder; -/// -/// # fn main() { -/// # let mut e = GzEncoder::new(Vec::new(), Compression::default()); -/// # e.write_all(b"Hello World").unwrap(); -/// # let bytes = e.finish().unwrap(); -/// # println!("{}", decode_reader(bytes).unwrap()); -/// # } -/// # -/// // Uncompresses a Gz Encoded vector of bytes and returns a string or error -/// // Here &[u8] implements Read -/// -/// fn decode_reader(bytes: Vec) -> io::Result { -/// let mut gz = MultiGzDecoder::new(&bytes[..]); -/// let mut s = String::new(); -/// gz.read_to_string(&mut s)?; -/// Ok(s) -/// } -/// ``` -#[derive(Debug)] -pub struct MultiGzDecoder { - inner: bufread::MultiGzDecoder>, -} - -impl MultiGzDecoder { - /// Creates a new decoder from the given reader, immediately parsing the - /// (first) gzip header. If the gzip stream contains multiple members all will - /// be decoded. - pub fn new(r: R) -> MultiGzDecoder { - MultiGzDecoder { - inner: bufread::MultiGzDecoder::new(BufReader::new(r)), - } - } -} - -impl MultiGzDecoder { - /// Returns the current header associated with this stream, if it's valid. - pub fn header(&self) -> Option<&GzHeader> { - self.inner.header() - } - - /// Acquires a reference to the underlying reader. - pub fn get_ref(&self) -> &R { - self.inner.get_ref().get_ref() - } - - /// Acquires a mutable reference to the underlying stream. - /// - /// Note that mutation of the stream may result in surprising results if - /// this encoder is continued to be used. - pub fn get_mut(&mut self) -> &mut R { - self.inner.get_mut().get_mut() - } - - /// Consumes this decoder, returning the underlying reader. - pub fn into_inner(self) -> R { - self.inner.into_inner().into_inner() - } -} - -impl Read for MultiGzDecoder { - fn read(&mut self, into: &mut [u8]) -> io::Result { - self.inner.read(into) - } -} - -impl Write for MultiGzDecoder { - fn write(&mut self, buf: &[u8]) -> io::Result { - self.get_mut().write(buf) - } - - fn flush(&mut self) -> io::Result<()> { - self.get_mut().flush() - } -} diff --git a/third_party/rust/flate2/v1/crate/src/gz/write.rs b/third_party/rust/flate2/v1/crate/src/gz/write.rs deleted file mode 100644 index 7cf1a7cd4174..000000000000 --- a/third_party/rust/flate2/v1/crate/src/gz/write.rs +++ /dev/null @@ -1,450 +0,0 @@ -use std::cmp; -use std::io; -use std::io::prelude::*; - -use super::bufread::{corrupt, read_gz_header}; -use super::{GzBuilder, GzHeader}; -use crate::crc::{Crc, CrcWriter}; -use crate::zio; -use crate::{Compress, Compression, Decompress, Status}; - -/// A gzip streaming encoder -/// -/// This structure exposes a [`Write`] interface that will emit compressed data -/// to the underlying writer `W`. -/// -/// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html -/// -/// # Examples -/// -/// ``` -/// use std::io::prelude::*; -/// use flate2::Compression; -/// use flate2::write::GzEncoder; -/// -/// // Vec implements Write to print the compressed bytes of sample string -/// # fn main() { -/// -/// let mut e = GzEncoder::new(Vec::new(), Compression::default()); -/// e.write_all(b"Hello World").unwrap(); -/// println!("{:?}", e.finish().unwrap()); -/// # } -/// ``` -#[derive(Debug)] -pub struct GzEncoder { - inner: zio::Writer, - crc: Crc, - crc_bytes_written: usize, - header: Vec, -} - -pub fn gz_encoder(header: Vec, w: W, lvl: Compression) -> GzEncoder { - GzEncoder { - inner: zio::Writer::new(w, Compress::new(lvl, false)), - crc: Crc::new(), - header, - crc_bytes_written: 0, - } -} - -impl GzEncoder { - /// Creates a new encoder which will use the given compression level. - /// - /// The encoder is not configured specially for the emitted header. For - /// header configuration, see the `GzBuilder` type. - /// - /// The data written to the returned encoder will be compressed and then - /// written to the stream `w`. - pub fn new(w: W, level: Compression) -> GzEncoder { - GzBuilder::new().write(w, level) - } - - /// Acquires a reference to the underlying writer. - pub fn get_ref(&self) -> &W { - self.inner.get_ref() - } - - /// Acquires a mutable reference to the underlying writer. - /// - /// Note that mutation of the writer may result in surprising results if - /// this encoder is continued to be used. - pub fn get_mut(&mut self) -> &mut W { - self.inner.get_mut() - } - - /// Attempt to finish this output stream, writing out final chunks of data. - /// - /// Note that this function can only be used once data has finished being - /// written to the output stream. After this function is called then further - /// calls to `write` may result in a panic. - /// - /// # Panics - /// - /// Attempts to write data to this stream may result in a panic after this - /// function is called. - /// - /// # Errors - /// - /// This function will perform I/O to complete this stream, and any I/O - /// errors which occur will be returned from this function. - pub fn try_finish(&mut self) -> io::Result<()> { - self.write_header()?; - self.inner.finish()?; - - while self.crc_bytes_written < 8 { - let (sum, amt) = (self.crc.sum() as u32, self.crc.amount()); - let buf = [ - (sum >> 0) as u8, - (sum >> 8) as u8, - (sum >> 16) as u8, - (sum >> 24) as u8, - (amt >> 0) as u8, - (amt >> 8) as u8, - (amt >> 16) as u8, - (amt >> 24) as u8, - ]; - let inner = self.inner.get_mut(); - let n = inner.write(&buf[self.crc_bytes_written..])?; - self.crc_bytes_written += n; - } - Ok(()) - } - - /// Finish encoding this stream, returning the underlying writer once the - /// encoding is done. - /// - /// Note that this function may not be suitable to call in a situation where - /// the underlying stream is an asynchronous I/O stream. To finish a stream - /// the `try_finish` (or `shutdown`) method should be used instead. To - /// re-acquire ownership of a stream it is safe to call this method after - /// `try_finish` or `shutdown` has returned `Ok`. - /// - /// # Errors - /// - /// This function will perform I/O to complete this stream, and any I/O - /// errors which occur will be returned from this function. - pub fn finish(mut self) -> io::Result { - self.try_finish()?; - Ok(self.inner.take_inner()) - } - - fn write_header(&mut self) -> io::Result<()> { - while !self.header.is_empty() { - let n = self.inner.get_mut().write(&self.header)?; - self.header.drain(..n); - } - Ok(()) - } -} - -impl Write for GzEncoder { - fn write(&mut self, buf: &[u8]) -> io::Result { - assert_eq!(self.crc_bytes_written, 0); - self.write_header()?; - let n = self.inner.write(buf)?; - self.crc.update(&buf[..n]); - Ok(n) - } - - fn flush(&mut self) -> io::Result<()> { - assert_eq!(self.crc_bytes_written, 0); - self.write_header()?; - self.inner.flush() - } -} - -impl Read for GzEncoder { - fn read(&mut self, buf: &mut [u8]) -> io::Result { - self.get_mut().read(buf) - } -} - -impl Drop for GzEncoder { - fn drop(&mut self) { - if self.inner.is_present() { - let _ = self.try_finish(); - } - } -} - -/// A gzip streaming decoder -/// -/// This structure exposes a [`Write`] interface that will emit compressed data -/// to the underlying writer `W`. -/// -/// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html -/// -/// # Examples -/// -/// ``` -/// use std::io::prelude::*; -/// use std::io; -/// use flate2::Compression; -/// use flate2::write::{GzEncoder, GzDecoder}; -/// -/// # fn main() { -/// # let mut e = GzEncoder::new(Vec::new(), Compression::default()); -/// # e.write(b"Hello World").unwrap(); -/// # let bytes = e.finish().unwrap(); -/// # assert_eq!("Hello World", decode_writer(bytes).unwrap()); -/// # } -/// // Uncompresses a gzip encoded vector of bytes and returns a string or error -/// // Here Vec implements Write -/// fn decode_writer(bytes: Vec) -> io::Result { -/// let mut writer = Vec::new(); -/// let mut decoder = GzDecoder::new(writer); -/// decoder.write_all(&bytes[..])?; -/// writer = decoder.finish()?; -/// let return_string = String::from_utf8(writer).expect("String parsing error"); -/// Ok(return_string) -/// } -/// ``` -#[derive(Debug)] -pub struct GzDecoder { - inner: zio::Writer, Decompress>, - crc_bytes: Vec, - header: Option, - header_buf: Vec, -} - -const CRC_BYTES_LEN: usize = 8; - -impl GzDecoder { - /// Creates a new decoder which will write uncompressed data to the stream. - /// - /// When this encoder is dropped or unwrapped the final pieces of data will - /// be flushed. - pub fn new(w: W) -> GzDecoder { - GzDecoder { - inner: zio::Writer::new(CrcWriter::new(w), Decompress::new(false)), - crc_bytes: Vec::with_capacity(CRC_BYTES_LEN), - header: None, - header_buf: Vec::new(), - } - } - - /// Returns the header associated with this stream. - pub fn header(&self) -> Option<&GzHeader> { - self.header.as_ref() - } - - /// Acquires a reference to the underlying writer. - pub fn get_ref(&self) -> &W { - self.inner.get_ref().get_ref() - } - - /// Acquires a mutable reference to the underlying writer. - /// - /// Note that mutating the output/input state of the stream may corrupt this - /// object, so care must be taken when using this method. - pub fn get_mut(&mut self) -> &mut W { - self.inner.get_mut().get_mut() - } - - /// Attempt to finish this output stream, writing out final chunks of data. - /// - /// Note that this function can only be used once data has finished being - /// written to the output stream. After this function is called then further - /// calls to `write` may result in a panic. - /// - /// # Panics - /// - /// Attempts to write data to this stream may result in a panic after this - /// function is called. - /// - /// # Errors - /// - /// This function will perform I/O to finish the stream, returning any - /// errors which happen. - pub fn try_finish(&mut self) -> io::Result<()> { - self.finish_and_check_crc()?; - Ok(()) - } - - /// Consumes this decoder, flushing the output stream. - /// - /// This will flush the underlying data stream and then return the contained - /// writer if the flush succeeded. - /// - /// Note that this function may not be suitable to call in a situation where - /// the underlying stream is an asynchronous I/O stream. To finish a stream - /// the `try_finish` (or `shutdown`) method should be used instead. To - /// re-acquire ownership of a stream it is safe to call this method after - /// `try_finish` or `shutdown` has returned `Ok`. - /// - /// # Errors - /// - /// This function will perform I/O to complete this stream, and any I/O - /// errors which occur will be returned from this function. - pub fn finish(mut self) -> io::Result { - self.finish_and_check_crc()?; - Ok(self.inner.take_inner().into_inner()) - } - - fn finish_and_check_crc(&mut self) -> io::Result<()> { - self.inner.finish()?; - - if self.crc_bytes.len() != 8 { - return Err(corrupt()); - } - - let crc = ((self.crc_bytes[0] as u32) << 0) - | ((self.crc_bytes[1] as u32) << 8) - | ((self.crc_bytes[2] as u32) << 16) - | ((self.crc_bytes[3] as u32) << 24); - let amt = ((self.crc_bytes[4] as u32) << 0) - | ((self.crc_bytes[5] as u32) << 8) - | ((self.crc_bytes[6] as u32) << 16) - | ((self.crc_bytes[7] as u32) << 24); - if crc != self.inner.get_ref().crc().sum() as u32 { - return Err(corrupt()); - } - if amt != self.inner.get_ref().crc().amount() { - return Err(corrupt()); - } - Ok(()) - } -} - -struct Counter { - inner: T, - pos: usize, -} - -impl Read for Counter { - fn read(&mut self, buf: &mut [u8]) -> io::Result { - let pos = self.inner.read(buf)?; - self.pos += pos; - Ok(pos) - } -} - -impl Write for GzDecoder { - fn write(&mut self, buf: &[u8]) -> io::Result { - if self.header.is_none() { - // trying to avoid buffer usage - let (res, pos) = { - let mut counter = Counter { - inner: self.header_buf.chain(buf), - pos: 0, - }; - let res = read_gz_header(&mut counter); - (res, counter.pos) - }; - - match res { - Err(err) => { - if err.kind() == io::ErrorKind::UnexpectedEof { - // not enough data for header, save to the buffer - self.header_buf.extend(buf); - Ok(buf.len()) - } else { - Err(err) - } - } - Ok(header) => { - self.header = Some(header); - let pos = pos - self.header_buf.len(); - self.header_buf.truncate(0); - Ok(pos) - } - } - } else { - let (n, status) = self.inner.write_with_status(buf)?; - - if status == Status::StreamEnd && n < buf.len() && self.crc_bytes.len() < 8 { - let remaining = buf.len() - n; - let crc_bytes = cmp::min(remaining, CRC_BYTES_LEN - self.crc_bytes.len()); - self.crc_bytes.extend(&buf[n..n + crc_bytes]); - return Ok(n + crc_bytes); - } - Ok(n) - } - } - - fn flush(&mut self) -> io::Result<()> { - self.inner.flush() - } -} - -impl Read for GzDecoder { - fn read(&mut self, buf: &mut [u8]) -> io::Result { - self.inner.get_mut().get_mut().read(buf) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - const STR: &'static str = "Hello World Hello World Hello World Hello World Hello World \ - Hello World Hello World Hello World Hello World Hello World \ - Hello World Hello World Hello World Hello World Hello World \ - Hello World Hello World Hello World Hello World Hello World \ - Hello World Hello World Hello World Hello World Hello World"; - - #[test] - fn decode_writer_one_chunk() { - let mut e = GzEncoder::new(Vec::new(), Compression::default()); - e.write(STR.as_ref()).unwrap(); - let bytes = e.finish().unwrap(); - - let mut writer = Vec::new(); - let mut decoder = GzDecoder::new(writer); - let n = decoder.write(&bytes[..]).unwrap(); - decoder.write(&bytes[n..]).unwrap(); - decoder.try_finish().unwrap(); - writer = decoder.finish().unwrap(); - let return_string = String::from_utf8(writer).expect("String parsing error"); - assert_eq!(return_string, STR); - } - - #[test] - fn decode_writer_partial_header() { - let mut e = GzEncoder::new(Vec::new(), Compression::default()); - e.write(STR.as_ref()).unwrap(); - let bytes = e.finish().unwrap(); - - let mut writer = Vec::new(); - let mut decoder = GzDecoder::new(writer); - assert_eq!(decoder.write(&bytes[..5]).unwrap(), 5); - let n = decoder.write(&bytes[5..]).unwrap(); - if n < bytes.len() - 5 { - decoder.write(&bytes[n + 5..]).unwrap(); - } - writer = decoder.finish().unwrap(); - let return_string = String::from_utf8(writer).expect("String parsing error"); - assert_eq!(return_string, STR); - } - - #[test] - fn decode_writer_exact_header() { - let mut e = GzEncoder::new(Vec::new(), Compression::default()); - e.write(STR.as_ref()).unwrap(); - let bytes = e.finish().unwrap(); - - let mut writer = Vec::new(); - let mut decoder = GzDecoder::new(writer); - assert_eq!(decoder.write(&bytes[..10]).unwrap(), 10); - decoder.write(&bytes[10..]).unwrap(); - writer = decoder.finish().unwrap(); - let return_string = String::from_utf8(writer).expect("String parsing error"); - assert_eq!(return_string, STR); - } - - #[test] - fn decode_writer_partial_crc() { - let mut e = GzEncoder::new(Vec::new(), Compression::default()); - e.write(STR.as_ref()).unwrap(); - let bytes = e.finish().unwrap(); - - let mut writer = Vec::new(); - let mut decoder = GzDecoder::new(writer); - let l = bytes.len() - 5; - let n = decoder.write(&bytes[..l]).unwrap(); - decoder.write(&bytes[n..]).unwrap(); - writer = decoder.finish().unwrap(); - let return_string = String::from_utf8(writer).expect("String parsing error"); - assert_eq!(return_string, STR); - } -} diff --git a/third_party/rust/flate2/v1/crate/src/lib.rs b/third_party/rust/flate2/v1/crate/src/lib.rs deleted file mode 100644 index 23a783e55932..000000000000 --- a/third_party/rust/flate2/v1/crate/src/lib.rs +++ /dev/null @@ -1,204 +0,0 @@ -//! A DEFLATE-based stream compression/decompression library -//! -//! This library provides support for compression and decompression of -//! DEFLATE-based streams: -//! -//! * the DEFLATE format itself -//! * the zlib format -//! * gzip -//! -//! These three formats are all closely related and largely only differ in their -//! headers/footers. This crate has three types in each submodule for dealing -//! with these three formats. -//! -//! # Implementation -//! -//! In addition to supporting three formats, this crate supports several different -//! backends, controlled through this crate's features: -//! -//! * `default`, or `rust_backend` - this implementation uses the `miniz_oxide` -//! crate which is a port of `miniz.c` (below) to Rust. This feature does not -//! require a C compiler and only requires Rust code. -//! -//! * `zlib` - this feature will enable linking against the `libz` library, typically found on most -//! Linux systems by default. If the library isn't found to already be on the system it will be -//! compiled from source (this is a C library). -//! -//! There's various tradeoffs associated with each implementation, but in general you probably -//! won't have to tweak the defaults. The default choice is selected to avoid the need for a C -//! compiler at build time. `zlib-ng-compat` is useful if you're using zlib for compatibility but -//! want performance via zlib-ng's zlib-compat mode. `zlib` is useful if something else in your -//! dependencies links the original zlib so you cannot use zlib-ng-compat. The compression ratios -//! and performance of each of these feature should be roughly comparable, but you'll likely want -//! to run your own tests if you're curious about the performance. -//! -//! # Organization -//! -//! This crate consists mainly of three modules, [`read`], [`write`], and -//! [`bufread`]. Each module contains a number of types used to encode and -//! decode various streams of data. -//! -//! All types in the [`write`] module work on instances of [`Write`][write], -//! whereas all types in the [`read`] module work on instances of -//! [`Read`][read] and [`bufread`] works with [`BufRead`][bufread]. If you -//! are decoding directly from a `&[u8]`, use the [`bufread`] types. -//! -//! ``` -//! use flate2::write::GzEncoder; -//! use flate2::Compression; -//! use std::io; -//! use std::io::prelude::*; -//! -//! # fn main() { let _ = run(); } -//! # fn run() -> io::Result<()> { -//! let mut encoder = GzEncoder::new(Vec::new(), Compression::default()); -//! encoder.write_all(b"Example")?; -//! # Ok(()) -//! # } -//! ``` -//! -//! -//! Other various types are provided at the top-level of the crate for -//! management and dealing with encoders/decoders. Also note that types which -//! operate over a specific trait often implement the mirroring trait as well. -//! For example a `flate2::read::DeflateDecoder` *also* implements the -//! `Write` trait if `T: Write`. That is, the "dual trait" is forwarded directly -//! to the underlying object if available. -//! -//! [`read`]: read/index.html -//! [`bufread`]: bufread/index.html -//! [`write`]: write/index.html -//! [read]: https://doc.rust-lang.org/std/io/trait.Read.html -//! [write]: https://doc.rust-lang.org/std/io/trait.Write.html -//! [bufread]: https://doc.rust-lang.org/std/io/trait.BufRead.html -#![doc(html_root_url = "https://docs.rs/flate2/0.2")] -#![deny(missing_docs)] -#![deny(missing_debug_implementations)] -#![allow(trivial_numeric_casts)] -#![cfg_attr(test, deny(warnings))] - -pub use crate::crc::{Crc, CrcReader, CrcWriter}; -pub use crate::gz::GzBuilder; -pub use crate::gz::GzHeader; -pub use crate::mem::{Compress, CompressError, Decompress, DecompressError, Status}; -pub use crate::mem::{FlushCompress, FlushDecompress}; - -mod bufreader; -mod crc; -mod deflate; -mod ffi; -mod gz; -mod mem; -mod zio; -mod zlib; - -/// Types which operate over [`Read`] streams, both encoders and decoders for -/// various formats. -/// -/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html -pub mod read { - pub use crate::deflate::read::DeflateDecoder; - pub use crate::deflate::read::DeflateEncoder; - pub use crate::gz::read::GzDecoder; - pub use crate::gz::read::GzEncoder; - pub use crate::gz::read::MultiGzDecoder; - pub use crate::zlib::read::ZlibDecoder; - pub use crate::zlib::read::ZlibEncoder; -} - -/// Types which operate over [`Write`] streams, both encoders and decoders for -/// various formats. -/// -/// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html -pub mod write { - pub use crate::deflate::write::DeflateDecoder; - pub use crate::deflate::write::DeflateEncoder; - pub use crate::gz::write::GzDecoder; - pub use crate::gz::write::GzEncoder; - pub use crate::zlib::write::ZlibDecoder; - pub use crate::zlib::write::ZlibEncoder; -} - -/// Types which operate over [`BufRead`] streams, both encoders and decoders for -/// various formats. -/// -/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html -pub mod bufread { - pub use crate::deflate::bufread::DeflateDecoder; - pub use crate::deflate::bufread::DeflateEncoder; - pub use crate::gz::bufread::GzDecoder; - pub use crate::gz::bufread::GzEncoder; - pub use crate::gz::bufread::MultiGzDecoder; - pub use crate::zlib::bufread::ZlibDecoder; - pub use crate::zlib::bufread::ZlibEncoder; -} - -fn _assert_send_sync() { - fn _assert_send_sync() {} - - _assert_send_sync::>(); - _assert_send_sync::>(); - _assert_send_sync::>(); - _assert_send_sync::>(); - _assert_send_sync::>(); - _assert_send_sync::>(); - _assert_send_sync::>(); - _assert_send_sync::>>(); - _assert_send_sync::>>(); - _assert_send_sync::>>(); - _assert_send_sync::>>(); - _assert_send_sync::>>(); - _assert_send_sync::>>(); -} - -/// When compressing data, the compression level can be specified by a value in -/// this enum. -#[derive(Copy, Clone, PartialEq, Eq, Debug)] -pub struct Compression(u32); - -impl Compression { - /// Creates a new description of the compression level with an explicitly - /// specified integer. - /// - /// The integer here is typically on a scale of 0-9 where 0 means "no - /// compression" and 9 means "take as long as you'd like". - pub const fn new(level: u32) -> Compression { - Compression(level) - } - - /// No compression is to be performed, this may actually inflate data - /// slightly when encoding. - pub const fn none() -> Compression { - Compression(0) - } - - /// Optimize for the best speed of encoding. - pub const fn fast() -> Compression { - Compression(1) - } - - /// Optimize for the size of data being encoded. - pub const fn best() -> Compression { - Compression(9) - } - - /// Returns an integer representing the compression level, typically on a - /// scale of 0-9 - pub fn level(&self) -> u32 { - self.0 - } -} - -impl Default for Compression { - fn default() -> Compression { - Compression(6) - } -} - -#[cfg(test)] -fn random_bytes() -> impl Iterator { - use rand::Rng; - use std::iter; - - iter::repeat(()).map(|_| rand::thread_rng().gen()) -} diff --git a/third_party/rust/flate2/v1/crate/src/mem.rs b/third_party/rust/flate2/v1/crate/src/mem.rs deleted file mode 100644 index 5f1d6d8c1893..000000000000 --- a/third_party/rust/flate2/v1/crate/src/mem.rs +++ /dev/null @@ -1,788 +0,0 @@ -use std::error::Error; -use std::fmt; -use std::io; -use std::slice; - -use crate::ffi::{self, Backend, Deflate, DeflateBackend, ErrorMessage, Inflate, InflateBackend}; -use crate::Compression; - -/// Raw in-memory compression stream for blocks of data. -/// -/// This type is the building block for the I/O streams in the rest of this -/// crate. It requires more management than the [`Read`]/[`Write`] API but is -/// maximally flexible in terms of accepting input from any source and being -/// able to produce output to any memory location. -/// -/// It is recommended to use the I/O stream adaptors over this type as they're -/// easier to use. -/// -/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html -/// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html -#[derive(Debug)] -pub struct Compress { - inner: Deflate, -} - -/// Raw in-memory decompression stream for blocks of data. -/// -/// This type is the building block for the I/O streams in the rest of this -/// crate. It requires more management than the [`Read`]/[`Write`] API but is -/// maximally flexible in terms of accepting input from any source and being -/// able to produce output to any memory location. -/// -/// It is recommended to use the I/O stream adaptors over this type as they're -/// easier to use. -/// -/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html -/// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html -#[derive(Debug)] -pub struct Decompress { - inner: Inflate, -} - -#[derive(Copy, Clone, PartialEq, Eq, Debug)] -/// Values which indicate the form of flushing to be used when compressing -/// in-memory data. -pub enum FlushCompress { - /// A typical parameter for passing to compression/decompression functions, - /// this indicates that the underlying stream to decide how much data to - /// accumulate before producing output in order to maximize compression. - None = ffi::MZ_NO_FLUSH as isize, - - /// All pending output is flushed to the output buffer and the output is - /// aligned on a byte boundary so that the decompressor can get all input - /// data available so far. - /// - /// Flushing may degrade compression for some compression algorithms and so - /// it should only be used when necessary. This will complete the current - /// deflate block and follow it with an empty stored block. - Sync = ffi::MZ_SYNC_FLUSH as isize, - - /// All pending output is flushed to the output buffer, but the output is - /// not aligned to a byte boundary. - /// - /// All of the input data so far will be available to the decompressor (as - /// with `Flush::Sync`. This completes the current deflate block and follows - /// it with an empty fixed codes block that is 10 bites long, and it assures - /// that enough bytes are output in order for the decompressor to finish the - /// block before the empty fixed code block. - Partial = ffi::MZ_PARTIAL_FLUSH as isize, - - /// All output is flushed as with `Flush::Sync` and the compression state is - /// reset so decompression can restart from this point if previous - /// compressed data has been damaged or if random access is desired. - /// - /// Using this option too often can seriously degrade compression. - Full = ffi::MZ_FULL_FLUSH as isize, - - /// Pending input is processed and pending output is flushed. - /// - /// The return value may indicate that the stream is not yet done and more - /// data has yet to be processed. - Finish = ffi::MZ_FINISH as isize, - - #[doc(hidden)] - _Nonexhaustive, -} - -#[derive(Copy, Clone, PartialEq, Eq, Debug)] -/// Values which indicate the form of flushing to be used when -/// decompressing in-memory data. -pub enum FlushDecompress { - /// A typical parameter for passing to compression/decompression functions, - /// this indicates that the underlying stream to decide how much data to - /// accumulate before producing output in order to maximize compression. - None = ffi::MZ_NO_FLUSH as isize, - - /// All pending output is flushed to the output buffer and the output is - /// aligned on a byte boundary so that the decompressor can get all input - /// data available so far. - /// - /// Flushing may degrade compression for some compression algorithms and so - /// it should only be used when necessary. This will complete the current - /// deflate block and follow it with an empty stored block. - Sync = ffi::MZ_SYNC_FLUSH as isize, - - /// Pending input is processed and pending output is flushed. - /// - /// The return value may indicate that the stream is not yet done and more - /// data has yet to be processed. - Finish = ffi::MZ_FINISH as isize, - - #[doc(hidden)] - _Nonexhaustive, -} - -/// The inner state for an error when decompressing -#[derive(Debug)] -pub(crate) enum DecompressErrorInner { - General { msg: ErrorMessage }, - NeedsDictionary(u32), -} - -/// Error returned when a decompression object finds that the input stream of -/// bytes was not a valid input stream of bytes. -#[derive(Debug)] -pub struct DecompressError(pub(crate) DecompressErrorInner); - -impl DecompressError { - /// Indicates whether decompression failed due to requiring a dictionary. - /// - /// The resulting integer is the Adler-32 checksum of the dictionary - /// required. - pub fn needs_dictionary(&self) -> Option { - match self.0 { - DecompressErrorInner::NeedsDictionary(adler) => Some(adler), - _ => None, - } - } -} - -#[inline] -pub(crate) fn decompress_failed(msg: ErrorMessage) -> Result { - Err(DecompressError(DecompressErrorInner::General { msg })) -} - -#[inline] -pub(crate) fn decompress_need_dict(adler: u32) -> Result { - Err(DecompressError(DecompressErrorInner::NeedsDictionary( - adler, - ))) -} - -/// Error returned when a compression object is used incorrectly or otherwise -/// generates an error. -#[derive(Debug)] -pub struct CompressError { - pub(crate) msg: ErrorMessage, -} - -#[inline] -pub(crate) fn compress_failed(msg: ErrorMessage) -> Result { - Err(CompressError { msg }) -} - -/// Possible status results of compressing some data or successfully -/// decompressing a block of data. -#[derive(Copy, Clone, PartialEq, Eq, Debug)] -pub enum Status { - /// Indicates success. - /// - /// Means that more input may be needed but isn't available - /// and/or there's more output to be written but the output buffer is full. - Ok, - - /// Indicates that forward progress is not possible due to input or output - /// buffers being empty. - /// - /// For compression it means the input buffer needs some more data or the - /// output buffer needs to be freed up before trying again. - /// - /// For decompression this means that more input is needed to continue or - /// the output buffer isn't large enough to contain the result. The function - /// can be called again after fixing both. - BufError, - - /// Indicates that all input has been consumed and all output bytes have - /// been written. Decompression/compression should not be called again. - /// - /// For decompression with zlib streams the adler-32 of the decompressed - /// data has also been verified. - StreamEnd, -} - -impl Compress { - /// Creates a new object ready for compressing data that it's given. - /// - /// The `level` argument here indicates what level of compression is going - /// to be performed, and the `zlib_header` argument indicates whether the - /// output data should have a zlib header or not. - pub fn new(level: Compression, zlib_header: bool) -> Compress { - Compress { - inner: Deflate::make(level, zlib_header, ffi::MZ_DEFAULT_WINDOW_BITS as u8), - } - } - - /// Creates a new object ready for compressing data that it's given. - /// - /// The `level` argument here indicates what level of compression is going - /// to be performed, and the `zlib_header` argument indicates whether the - /// output data should have a zlib header or not. The `window_bits` parameter - /// indicates the base-2 logarithm of the sliding window size and must be - /// between 9 and 15. - /// - /// # Panics - /// - /// If `window_bits` does not fall into the range 9 ..= 15, - /// `new_with_window_bits` will panic. - /// - /// # Note - /// - /// This constructor is only available when the `zlib` feature is used. - /// Other backends currently do not support custom window bits. - #[cfg(feature = "any_zlib")] - pub fn new_with_window_bits( - level: Compression, - zlib_header: bool, - window_bits: u8, - ) -> Compress { - assert!( - window_bits > 8 && window_bits < 16, - "window_bits must be within 9 ..= 15" - ); - Compress { - inner: Deflate::make(level, zlib_header, window_bits), - } - } - - /// Creates a new object ready for compressing data that it's given. - /// - /// The `level` argument here indicates what level of compression is going - /// to be performed. - /// - /// The Compress object produced by this constructor outputs gzip headers - /// for the compressed data. - /// - /// # Panics - /// - /// If `window_bits` does not fall into the range 9 ..= 15, - /// `new_with_window_bits` will panic. - /// - /// # Note - /// - /// This constructor is only available when the `zlib` feature is used. - /// Other backends currently do not support gzip headers for Compress. - #[cfg(feature = "any_zlib")] - pub fn new_gzip(level: Compression, window_bits: u8) -> Compress { - assert!( - window_bits > 8 && window_bits < 16, - "window_bits must be within 9 ..= 15" - ); - Compress { - inner: Deflate::make(level, true, window_bits + 16), - } - } - - /// Returns the total number of input bytes which have been processed by - /// this compression object. - pub fn total_in(&self) -> u64 { - self.inner.total_in() - } - - /// Returns the total number of output bytes which have been produced by - /// this compression object. - pub fn total_out(&self) -> u64 { - self.inner.total_out() - } - - /// Specifies the compression dictionary to use. - /// - /// Returns the Adler-32 checksum of the dictionary. - #[cfg(feature = "any_zlib")] - pub fn set_dictionary(&mut self, dictionary: &[u8]) -> Result { - let stream = &mut *self.inner.inner.stream_wrapper; - stream.msg = std::ptr::null_mut(); - let rc = unsafe { - assert!(dictionary.len() < ffi::uInt::MAX as usize); - ffi::deflateSetDictionary(stream, dictionary.as_ptr(), dictionary.len() as ffi::uInt) - }; - - match rc { - ffi::MZ_STREAM_ERROR => compress_failed(self.inner.inner.msg()), - ffi::MZ_OK => Ok(stream.adler as u32), - c => panic!("unknown return code: {}", c), - } - } - - /// Quickly resets this compressor without having to reallocate anything. - /// - /// This is equivalent to dropping this object and then creating a new one. - pub fn reset(&mut self) { - self.inner.reset(); - } - - /// Dynamically updates the compression level. - /// - /// This can be used to switch between compression levels for different - /// kinds of data, or it can be used in conjunction with a call to reset - /// to reuse the compressor. - /// - /// This may return an error if there wasn't enough output space to complete - /// the compression of the available input data before changing the - /// compression level. Flushing the stream before calling this method - /// ensures that the function will succeed on the first call. - #[cfg(feature = "any_zlib")] - pub fn set_level(&mut self, level: Compression) -> Result<(), CompressError> { - use std::os::raw::c_int; - let stream = &mut *self.inner.inner.stream_wrapper; - stream.msg = std::ptr::null_mut(); - - let rc = unsafe { ffi::deflateParams(stream, level.0 as c_int, ffi::MZ_DEFAULT_STRATEGY) }; - - match rc { - ffi::MZ_OK => Ok(()), - ffi::MZ_BUF_ERROR => compress_failed(self.inner.inner.msg()), - c => panic!("unknown return code: {}", c), - } - } - - /// Compresses the input data into the output, consuming only as much - /// input as needed and writing as much output as possible. - /// - /// The flush option can be any of the available `FlushCompress` parameters. - /// - /// To learn how much data was consumed or how much output was produced, use - /// the `total_in` and `total_out` functions before/after this is called. - pub fn compress( - &mut self, - input: &[u8], - output: &mut [u8], - flush: FlushCompress, - ) -> Result { - self.inner.compress(input, output, flush) - } - - /// Compresses the input data into the extra space of the output, consuming - /// only as much input as needed and writing as much output as possible. - /// - /// This function has the same semantics as `compress`, except that the - /// length of `vec` is managed by this function. This will not reallocate - /// the vector provided or attempt to grow it, so space for the output must - /// be reserved in the output vector by the caller before calling this - /// function. - pub fn compress_vec( - &mut self, - input: &[u8], - output: &mut Vec, - flush: FlushCompress, - ) -> Result { - let cap = output.capacity(); - let len = output.len(); - - unsafe { - let before = self.total_out(); - let ret = { - let ptr = output.as_mut_ptr().offset(len as isize); - let out = slice::from_raw_parts_mut(ptr, cap - len); - self.compress(input, out, flush) - }; - output.set_len((self.total_out() - before) as usize + len); - ret - } - } -} - -impl Decompress { - /// Creates a new object ready for decompressing data that it's given. - /// - /// The `zlib_header` argument indicates whether the input data is expected - /// to have a zlib header or not. - pub fn new(zlib_header: bool) -> Decompress { - Decompress { - inner: Inflate::make(zlib_header, ffi::MZ_DEFAULT_WINDOW_BITS as u8), - } - } - - /// Creates a new object ready for decompressing data that it's given. - /// - /// The `zlib_header` argument indicates whether the input data is expected - /// to have a zlib header or not. The `window_bits` parameter indicates the - /// base-2 logarithm of the sliding window size and must be between 9 and 15. - /// - /// # Panics - /// - /// If `window_bits` does not fall into the range 9 ..= 15, - /// `new_with_window_bits` will panic. - /// - /// # Note - /// - /// This constructor is only available when the `zlib` feature is used. - /// Other backends currently do not support custom window bits. - #[cfg(feature = "any_zlib")] - pub fn new_with_window_bits(zlib_header: bool, window_bits: u8) -> Decompress { - assert!( - window_bits > 8 && window_bits < 16, - "window_bits must be within 9 ..= 15" - ); - Decompress { - inner: Inflate::make(zlib_header, window_bits), - } - } - - /// Creates a new object ready for decompressing data that it's given. - /// - /// The Decompress object produced by this constructor expects gzip headers - /// for the compressed data. - /// - /// # Panics - /// - /// If `window_bits` does not fall into the range 9 ..= 15, - /// `new_with_window_bits` will panic. - /// - /// # Note - /// - /// This constructor is only available when the `zlib` feature is used. - /// Other backends currently do not support gzip headers for Decompress. - #[cfg(feature = "any_zlib")] - pub fn new_gzip(window_bits: u8) -> Decompress { - assert!( - window_bits > 8 && window_bits < 16, - "window_bits must be within 9 ..= 15" - ); - Decompress { - inner: Inflate::make(true, window_bits + 16), - } - } - - /// Returns the total number of input bytes which have been processed by - /// this decompression object. - pub fn total_in(&self) -> u64 { - self.inner.total_in() - } - - /// Returns the total number of output bytes which have been produced by - /// this decompression object. - pub fn total_out(&self) -> u64 { - self.inner.total_out() - } - - /// Decompresses the input data into the output, consuming only as much - /// input as needed and writing as much output as possible. - /// - /// The flush option can be any of the available `FlushDecompress` parameters. - /// - /// If the first call passes `FlushDecompress::Finish` it is assumed that - /// the input and output buffers are both sized large enough to decompress - /// the entire stream in a single call. - /// - /// A flush value of `FlushDecompress::Finish` indicates that there are no - /// more source bytes available beside what's already in the input buffer, - /// and the output buffer is large enough to hold the rest of the - /// decompressed data. - /// - /// To learn how much data was consumed or how much output was produced, use - /// the `total_in` and `total_out` functions before/after this is called. - /// - /// # Errors - /// - /// If the input data to this instance of `Decompress` is not a valid - /// zlib/deflate stream then this function may return an instance of - /// `DecompressError` to indicate that the stream of input bytes is corrupted. - pub fn decompress( - &mut self, - input: &[u8], - output: &mut [u8], - flush: FlushDecompress, - ) -> Result { - self.inner.decompress(input, output, flush) - } - - /// Decompresses the input data into the extra space in the output vector - /// specified by `output`. - /// - /// This function has the same semantics as `decompress`, except that the - /// length of `vec` is managed by this function. This will not reallocate - /// the vector provided or attempt to grow it, so space for the output must - /// be reserved in the output vector by the caller before calling this - /// function. - /// - /// # Errors - /// - /// If the input data to this instance of `Decompress` is not a valid - /// zlib/deflate stream then this function may return an instance of - /// `DecompressError` to indicate that the stream of input bytes is corrupted. - pub fn decompress_vec( - &mut self, - input: &[u8], - output: &mut Vec, - flush: FlushDecompress, - ) -> Result { - let cap = output.capacity(); - let len = output.len(); - - unsafe { - let before = self.total_out(); - let ret = { - let ptr = output.as_mut_ptr().offset(len as isize); - let out = slice::from_raw_parts_mut(ptr, cap - len); - self.decompress(input, out, flush) - }; - output.set_len((self.total_out() - before) as usize + len); - ret - } - } - - /// Specifies the decompression dictionary to use. - #[cfg(feature = "any_zlib")] - pub fn set_dictionary(&mut self, dictionary: &[u8]) -> Result { - let stream = &mut *self.inner.inner.stream_wrapper; - stream.msg = std::ptr::null_mut(); - let rc = unsafe { - assert!(dictionary.len() < ffi::uInt::MAX as usize); - ffi::inflateSetDictionary(stream, dictionary.as_ptr(), dictionary.len() as ffi::uInt) - }; - - match rc { - ffi::MZ_STREAM_ERROR => decompress_failed(self.inner.inner.msg()), - ffi::MZ_DATA_ERROR => decompress_need_dict(stream.adler as u32), - ffi::MZ_OK => Ok(stream.adler as u32), - c => panic!("unknown return code: {}", c), - } - } - - /// Performs the equivalent of replacing this decompression state with a - /// freshly allocated copy. - /// - /// This function may not allocate memory, though, and attempts to reuse any - /// previously existing resources. - /// - /// The argument provided here indicates whether the reset state will - /// attempt to decode a zlib header first or not. - pub fn reset(&mut self, zlib_header: bool) { - self.inner.reset(zlib_header); - } -} - -impl Error for DecompressError {} - -impl DecompressError { - /// Retrieve the implementation's message about why the operation failed, if one exists. - pub fn message(&self) -> Option<&str> { - match &self.0 { - DecompressErrorInner::General { msg } => msg.get(), - _ => None, - } - } -} - -impl From for io::Error { - fn from(data: DecompressError) -> io::Error { - io::Error::new(io::ErrorKind::Other, data) - } -} - -impl fmt::Display for DecompressError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let msg = match &self.0 { - DecompressErrorInner::General { msg } => msg.get(), - DecompressErrorInner::NeedsDictionary { .. } => Some("requires a dictionary"), - }; - match msg { - Some(msg) => write!(f, "deflate decompression error: {}", msg), - None => write!(f, "deflate decompression error"), - } - } -} - -impl Error for CompressError {} - -impl CompressError { - /// Retrieve the implementation's message about why the operation failed, if one exists. - pub fn message(&self) -> Option<&str> { - self.msg.get() - } -} - -impl From for io::Error { - fn from(data: CompressError) -> io::Error { - io::Error::new(io::ErrorKind::Other, data) - } -} - -impl fmt::Display for CompressError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self.msg.get() { - Some(msg) => write!(f, "deflate compression error: {}", msg), - None => write!(f, "deflate compression error"), - } - } -} - -#[cfg(test)] -mod tests { - use std::io::Write; - - use crate::write; - use crate::{Compression, Decompress, FlushDecompress}; - - #[cfg(feature = "any_zlib")] - use crate::{Compress, FlushCompress}; - - #[test] - fn issue51() { - let data = vec![ - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0xb3, 0xc9, 0x28, 0xc9, - 0xcd, 0xb1, 0xe3, 0xe5, 0xb2, 0xc9, 0x48, 0x4d, 0x4c, 0xb1, 0xb3, 0x29, 0xc9, 0x2c, - 0xc9, 0x49, 0xb5, 0x33, 0x31, 0x30, 0x51, 0xf0, 0xcb, 0x2f, 0x51, 0x70, 0xcb, 0x2f, - 0xcd, 0x4b, 0xb1, 0xd1, 0x87, 0x08, 0xda, 0xe8, 0x83, 0x95, 0x00, 0x95, 0x26, 0xe5, - 0xa7, 0x54, 0x2a, 0x24, 0xa5, 0x27, 0xe7, 0xe7, 0xe4, 0x17, 0xd9, 0x2a, 0x95, 0x67, - 0x64, 0x96, 0xa4, 0x2a, 0x81, 0x8c, 0x48, 0x4e, 0xcd, 0x2b, 0x49, 0x2d, 0xb2, 0xb3, - 0xc9, 0x30, 0x44, 0x37, 0x01, 0x28, 0x62, 0xa3, 0x0f, 0x95, 0x06, 0xd9, 0x05, 0x54, - 0x04, 0xe5, 0xe5, 0xa5, 0x67, 0xe6, 0x55, 0xe8, 0x1b, 0xea, 0x99, 0xe9, 0x19, 0x21, - 0xab, 0xd0, 0x07, 0xd9, 0x01, 0x32, 0x53, 0x1f, 0xea, 0x3e, 0x00, 0x94, 0x85, 0xeb, - 0xe4, 0xa8, 0x00, 0x00, 0x00, - ]; - - let mut decoded = Vec::with_capacity(data.len() * 2); - - let mut d = Decompress::new(false); - // decompressed whole deflate stream - assert!(d - .decompress_vec(&data[10..], &mut decoded, FlushDecompress::Finish) - .is_ok()); - - // decompress data that has nothing to do with the deflate stream (this - // used to panic) - drop(d.decompress_vec(&[0], &mut decoded, FlushDecompress::None)); - } - - #[test] - fn reset() { - let string = "hello world".as_bytes(); - let mut zlib = Vec::new(); - let mut deflate = Vec::new(); - - let comp = Compression::default(); - write::ZlibEncoder::new(&mut zlib, comp) - .write_all(string) - .unwrap(); - write::DeflateEncoder::new(&mut deflate, comp) - .write_all(string) - .unwrap(); - - let mut dst = [0; 1024]; - let mut decoder = Decompress::new(true); - decoder - .decompress(&zlib, &mut dst, FlushDecompress::Finish) - .unwrap(); - assert_eq!(decoder.total_out(), string.len() as u64); - assert!(dst.starts_with(string)); - - decoder.reset(false); - decoder - .decompress(&deflate, &mut dst, FlushDecompress::Finish) - .unwrap(); - assert_eq!(decoder.total_out(), string.len() as u64); - assert!(dst.starts_with(string)); - } - - #[cfg(feature = "any_zlib")] - #[test] - fn set_dictionary_with_zlib_header() { - let string = "hello, hello!".as_bytes(); - let dictionary = "hello".as_bytes(); - - let mut encoded = Vec::with_capacity(1024); - - let mut encoder = Compress::new(Compression::default(), true); - - let dictionary_adler = encoder.set_dictionary(&dictionary).unwrap(); - - encoder - .compress_vec(string, &mut encoded, FlushCompress::Finish) - .unwrap(); - - assert_eq!(encoder.total_in(), string.len() as u64); - assert_eq!(encoder.total_out(), encoded.len() as u64); - - let mut decoder = Decompress::new(true); - let mut decoded = [0; 1024]; - let decompress_error = decoder - .decompress(&encoded, &mut decoded, FlushDecompress::Finish) - .expect_err("decompression should fail due to requiring a dictionary"); - - let required_adler = decompress_error.needs_dictionary() - .expect("the first call to decompress should indicate a dictionary is required along with the required Adler-32 checksum"); - - assert_eq!(required_adler, dictionary_adler, - "the Adler-32 checksum should match the value when the dictionary was set on the compressor"); - - let actual_adler = decoder.set_dictionary(&dictionary).unwrap(); - - assert_eq!(required_adler, actual_adler); - - // Decompress the rest of the input to the remainder of the output buffer - let total_in = decoder.total_in(); - let total_out = decoder.total_out(); - - let decompress_result = decoder.decompress( - &encoded[total_in as usize..], - &mut decoded[total_out as usize..], - FlushDecompress::Finish, - ); - assert!(decompress_result.is_ok()); - - assert_eq!(&decoded[..decoder.total_out() as usize], string); - } - - #[cfg(feature = "any_zlib")] - #[test] - fn set_dictionary_raw() { - let string = "hello, hello!".as_bytes(); - let dictionary = "hello".as_bytes(); - - let mut encoded = Vec::with_capacity(1024); - - let mut encoder = Compress::new(Compression::default(), false); - - encoder.set_dictionary(&dictionary).unwrap(); - - encoder - .compress_vec(string, &mut encoded, FlushCompress::Finish) - .unwrap(); - - assert_eq!(encoder.total_in(), string.len() as u64); - assert_eq!(encoder.total_out(), encoded.len() as u64); - - let mut decoder = Decompress::new(false); - - decoder.set_dictionary(&dictionary).unwrap(); - - let mut decoded = [0; 1024]; - let decompress_result = decoder.decompress(&encoded, &mut decoded, FlushDecompress::Finish); - - assert!(decompress_result.is_ok()); - - assert_eq!(&decoded[..decoder.total_out() as usize], string); - } - - #[cfg(feature = "any_zlib")] - #[test] - fn test_gzip_flate() { - let string = "hello, hello!".as_bytes(); - - let mut encoded = Vec::with_capacity(1024); - - let mut encoder = Compress::new_gzip(Compression::default(), 9); - - encoder - .compress_vec(string, &mut encoded, FlushCompress::Finish) - .unwrap(); - - assert_eq!(encoder.total_in(), string.len() as u64); - assert_eq!(encoder.total_out(), encoded.len() as u64); - - let mut decoder = Decompress::new_gzip(9); - - let mut decoded = [0; 1024]; - decoder - .decompress(&encoded, &mut decoded, FlushDecompress::Finish) - .unwrap(); - - assert_eq!(&decoded[..decoder.total_out() as usize], string); - } - - #[cfg(feature = "any_zlib")] - #[test] - fn test_error_message() { - let mut decoder = Decompress::new(false); - let mut decoded = [0; 128]; - let garbage = b"xbvxzi"; - - let err = decoder - .decompress(&*garbage, &mut decoded, FlushDecompress::Finish) - .unwrap_err(); - - assert_eq!(err.message(), Some("invalid stored block lengths")); - } -} diff --git a/third_party/rust/flate2/v1/crate/src/zio.rs b/third_party/rust/flate2/v1/crate/src/zio.rs deleted file mode 100644 index 50beacbd0f9b..000000000000 --- a/third_party/rust/flate2/v1/crate/src/zio.rs +++ /dev/null @@ -1,288 +0,0 @@ -use std::io; -use std::io::prelude::*; -use std::mem; - -use crate::{Compress, Decompress, DecompressError, FlushCompress, FlushDecompress, Status}; - -#[derive(Debug)] -pub struct Writer { - obj: Option, - pub data: D, - buf: Vec, -} - -pub trait Ops { - type Flush: Flush; - fn total_in(&self) -> u64; - fn total_out(&self) -> u64; - fn run( - &mut self, - input: &[u8], - output: &mut [u8], - flush: Self::Flush, - ) -> Result; - fn run_vec( - &mut self, - input: &[u8], - output: &mut Vec, - flush: Self::Flush, - ) -> Result; -} - -impl Ops for Compress { - type Flush = FlushCompress; - fn total_in(&self) -> u64 { - self.total_in() - } - fn total_out(&self) -> u64 { - self.total_out() - } - fn run( - &mut self, - input: &[u8], - output: &mut [u8], - flush: FlushCompress, - ) -> Result { - Ok(self.compress(input, output, flush).unwrap()) - } - fn run_vec( - &mut self, - input: &[u8], - output: &mut Vec, - flush: FlushCompress, - ) -> Result { - Ok(self.compress_vec(input, output, flush).unwrap()) - } -} - -impl Ops for Decompress { - type Flush = FlushDecompress; - fn total_in(&self) -> u64 { - self.total_in() - } - fn total_out(&self) -> u64 { - self.total_out() - } - fn run( - &mut self, - input: &[u8], - output: &mut [u8], - flush: FlushDecompress, - ) -> Result { - self.decompress(input, output, flush) - } - fn run_vec( - &mut self, - input: &[u8], - output: &mut Vec, - flush: FlushDecompress, - ) -> Result { - self.decompress_vec(input, output, flush) - } -} - -pub trait Flush { - fn none() -> Self; - fn sync() -> Self; - fn finish() -> Self; -} - -impl Flush for FlushCompress { - fn none() -> Self { - FlushCompress::None - } - - fn sync() -> Self { - FlushCompress::Sync - } - - fn finish() -> Self { - FlushCompress::Finish - } -} - -impl Flush for FlushDecompress { - fn none() -> Self { - FlushDecompress::None - } - - fn sync() -> Self { - FlushDecompress::Sync - } - - fn finish() -> Self { - FlushDecompress::Finish - } -} - -pub fn read(obj: &mut R, data: &mut D, dst: &mut [u8]) -> io::Result -where - R: BufRead, - D: Ops, -{ - loop { - let (read, consumed, ret, eof); - { - let input = obj.fill_buf()?; - eof = input.is_empty(); - let before_out = data.total_out(); - let before_in = data.total_in(); - let flush = if eof { - D::Flush::finish() - } else { - D::Flush::none() - }; - ret = data.run(input, dst, flush); - read = (data.total_out() - before_out) as usize; - consumed = (data.total_in() - before_in) as usize; - } - obj.consume(consumed); - - match ret { - // If we haven't ready any data and we haven't hit EOF yet, - // then we need to keep asking for more data because if we - // return that 0 bytes of data have been read then it will - // be interpreted as EOF. - Ok(Status::Ok) | Ok(Status::BufError) if read == 0 && !eof && !dst.is_empty() => { - continue - } - Ok(Status::Ok) | Ok(Status::BufError) | Ok(Status::StreamEnd) => return Ok(read), - - Err(..) => { - return Err(io::Error::new( - io::ErrorKind::InvalidInput, - "corrupt deflate stream", - )) - } - } - } -} - -impl Writer { - pub fn new(w: W, d: D) -> Writer { - Writer { - obj: Some(w), - data: d, - buf: Vec::with_capacity(32 * 1024), - } - } - - pub fn finish(&mut self) -> io::Result<()> { - loop { - self.dump()?; - - let before = self.data.total_out(); - self.data.run_vec(&[], &mut self.buf, D::Flush::finish())?; - if before == self.data.total_out() { - return Ok(()); - } - } - } - - pub fn replace(&mut self, w: W) -> W { - self.buf.truncate(0); - mem::replace(self.get_mut(), w) - } - - pub fn get_ref(&self) -> &W { - self.obj.as_ref().unwrap() - } - - pub fn get_mut(&mut self) -> &mut W { - self.obj.as_mut().unwrap() - } - - // Note that this should only be called if the outer object is just about - // to be consumed! - // - // (e.g. an implementation of `into_inner`) - pub fn take_inner(&mut self) -> W { - self.obj.take().unwrap() - } - - pub fn is_present(&self) -> bool { - self.obj.is_some() - } - - // Returns total written bytes and status of underlying codec - pub(crate) fn write_with_status(&mut self, buf: &[u8]) -> io::Result<(usize, Status)> { - // miniz isn't guaranteed to actually write any of the buffer provided, - // it may be in a flushing mode where it's just giving us data before - // we're actually giving it any data. We don't want to spuriously return - // `Ok(0)` when possible as it will cause calls to write_all() to fail. - // As a result we execute this in a loop to ensure that we try our - // darndest to write the data. - loop { - self.dump()?; - - let before_in = self.data.total_in(); - let ret = self.data.run_vec(buf, &mut self.buf, D::Flush::none()); - let written = (self.data.total_in() - before_in) as usize; - let is_stream_end = matches!(ret, Ok(Status::StreamEnd)); - - if !buf.is_empty() && written == 0 && ret.is_ok() && !is_stream_end { - continue; - } - return match ret { - Ok(st) => match st { - Status::Ok | Status::BufError | Status::StreamEnd => Ok((written, st)), - }, - Err(..) => Err(io::Error::new( - io::ErrorKind::InvalidInput, - "corrupt deflate stream", - )), - }; - } - } - - fn dump(&mut self) -> io::Result<()> { - // TODO: should manage this buffer not with `drain` but probably more of - // a deque-like strategy. - while !self.buf.is_empty() { - let n = self.obj.as_mut().unwrap().write(&self.buf)?; - if n == 0 { - return Err(io::ErrorKind::WriteZero.into()); - } - self.buf.drain(..n); - } - Ok(()) - } -} - -impl Write for Writer { - fn write(&mut self, buf: &[u8]) -> io::Result { - self.write_with_status(buf).map(|res| res.0) - } - - fn flush(&mut self) -> io::Result<()> { - self.data - .run_vec(&[], &mut self.buf, D::Flush::sync()) - .unwrap(); - - // Unfortunately miniz doesn't actually tell us when we're done with - // pulling out all the data from the internal stream. To remedy this we - // have to continually ask the stream for more memory until it doesn't - // give us a chunk of memory the same size as our own internal buffer, - // at which point we assume it's reached the end. - loop { - self.dump()?; - let before = self.data.total_out(); - self.data - .run_vec(&[], &mut self.buf, D::Flush::none()) - .unwrap(); - if before == self.data.total_out() { - break; - } - } - - self.obj.as_mut().unwrap().flush() - } -} - -impl Drop for Writer { - fn drop(&mut self) { - if self.obj.is_some() { - let _ = self.finish(); - } - } -} diff --git a/third_party/rust/flate2/v1/crate/src/zlib/bufread.rs b/third_party/rust/flate2/v1/crate/src/zlib/bufread.rs deleted file mode 100644 index f1d32316510d..000000000000 --- a/third_party/rust/flate2/v1/crate/src/zlib/bufread.rs +++ /dev/null @@ -1,233 +0,0 @@ -use std::io; -use std::io::prelude::*; -use std::mem; - -use crate::zio; -use crate::{Compress, Decompress}; - -/// A ZLIB encoder, or compressor. -/// -/// This structure consumes a [`BufRead`] interface, reading uncompressed data -/// from the underlying reader, and emitting compressed data. -/// -/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html -/// -/// # Examples -/// -/// ``` -/// use std::io::prelude::*; -/// use flate2::Compression; -/// use flate2::bufread::ZlibEncoder; -/// use std::fs::File; -/// use std::io::BufReader; -/// -/// // Use a buffered file to compress contents into a Vec -/// -/// # fn open_hello_world() -> std::io::Result> { -/// let f = File::open("examples/hello_world.txt")?; -/// let b = BufReader::new(f); -/// let mut z = ZlibEncoder::new(b, Compression::fast()); -/// let mut buffer = Vec::new(); -/// z.read_to_end(&mut buffer)?; -/// # Ok(buffer) -/// # } -/// ``` -#[derive(Debug)] -pub struct ZlibEncoder { - obj: R, - data: Compress, -} - -impl ZlibEncoder { - /// Creates a new encoder which will read uncompressed data from the given - /// stream and emit the compressed stream. - pub fn new(r: R, level: crate::Compression) -> ZlibEncoder { - ZlibEncoder { - obj: r, - data: Compress::new(level, true), - } - } -} - -pub fn reset_encoder_data(zlib: &mut ZlibEncoder) { - zlib.data.reset() -} - -impl ZlibEncoder { - /// Resets the state of this encoder entirely, swapping out the input - /// stream for another. - /// - /// This function will reset the internal state of this encoder and replace - /// the input stream with the one provided, returning the previous input - /// stream. Future data read from this encoder will be the compressed - /// version of `r`'s data. - pub fn reset(&mut self, r: R) -> R { - reset_encoder_data(self); - mem::replace(&mut self.obj, r) - } - - /// Acquires a reference to the underlying reader - pub fn get_ref(&self) -> &R { - &self.obj - } - - /// Acquires a mutable reference to the underlying stream - /// - /// Note that mutation of the stream may result in surprising results if - /// this encoder is continued to be used. - pub fn get_mut(&mut self) -> &mut R { - &mut self.obj - } - - /// Consumes this encoder, returning the underlying reader. - pub fn into_inner(self) -> R { - self.obj - } - - /// Returns the number of bytes that have been read into this compressor. - /// - /// Note that not all bytes read from the underlying object may be accounted - /// for, there may still be some active buffering. - pub fn total_in(&self) -> u64 { - self.data.total_in() - } - - /// Returns the number of bytes that the compressor has produced. - /// - /// Note that not all bytes may have been read yet, some may still be - /// buffered. - pub fn total_out(&self) -> u64 { - self.data.total_out() - } -} - -impl Read for ZlibEncoder { - fn read(&mut self, buf: &mut [u8]) -> io::Result { - zio::read(&mut self.obj, &mut self.data, buf) - } -} - -impl Write for ZlibEncoder { - fn write(&mut self, buf: &[u8]) -> io::Result { - self.get_mut().write(buf) - } - - fn flush(&mut self) -> io::Result<()> { - self.get_mut().flush() - } -} - -/// A ZLIB decoder, or decompressor. -/// -/// This structure consumes a [`BufRead`] interface, reading compressed data -/// from the underlying reader, and emitting uncompressed data. -/// -/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html -/// -/// # Examples -/// -/// ``` -/// use std::io::prelude::*; -/// use std::io; -/// # use flate2::Compression; -/// # use flate2::write::ZlibEncoder; -/// use flate2::bufread::ZlibDecoder; -/// -/// # fn main() { -/// # let mut e = ZlibEncoder::new(Vec::new(), Compression::default()); -/// # e.write_all(b"Hello World").unwrap(); -/// # let bytes = e.finish().unwrap(); -/// # println!("{}", decode_bufreader(bytes).unwrap()); -/// # } -/// # -/// // Uncompresses a Zlib Encoded vector of bytes and returns a string or error -/// // Here &[u8] implements BufRead -/// -/// fn decode_bufreader(bytes: Vec) -> io::Result { -/// let mut z = ZlibDecoder::new(&bytes[..]); -/// let mut s = String::new(); -/// z.read_to_string(&mut s)?; -/// Ok(s) -/// } -/// ``` -#[derive(Debug)] -pub struct ZlibDecoder { - obj: R, - data: Decompress, -} - -impl ZlibDecoder { - /// Creates a new decoder which will decompress data read from the given - /// stream. - pub fn new(r: R) -> ZlibDecoder { - ZlibDecoder { - obj: r, - data: Decompress::new(true), - } - } -} - -pub fn reset_decoder_data(zlib: &mut ZlibDecoder) { - zlib.data = Decompress::new(true); -} - -impl ZlibDecoder { - /// Resets the state of this decoder entirely, swapping out the input - /// stream for another. - /// - /// This will reset the internal state of this decoder and replace the - /// input stream with the one provided, returning the previous input - /// stream. Future data read from this decoder will be the decompressed - /// version of `r`'s data. - pub fn reset(&mut self, r: R) -> R { - reset_decoder_data(self); - mem::replace(&mut self.obj, r) - } - - /// Acquires a reference to the underlying stream - pub fn get_ref(&self) -> &R { - &self.obj - } - - /// Acquires a mutable reference to the underlying stream - /// - /// Note that mutation of the stream may result in surprising results if - /// this encoder is continued to be used. - pub fn get_mut(&mut self) -> &mut R { - &mut self.obj - } - - /// Consumes this decoder, returning the underlying reader. - pub fn into_inner(self) -> R { - self.obj - } - - /// Returns the number of bytes that the decompressor has consumed. - /// - /// Note that this will likely be smaller than what the decompressor - /// actually read from the underlying stream due to buffering. - pub fn total_in(&self) -> u64 { - self.data.total_in() - } - - /// Returns the number of bytes that the decompressor has produced. - pub fn total_out(&self) -> u64 { - self.data.total_out() - } -} - -impl Read for ZlibDecoder { - fn read(&mut self, into: &mut [u8]) -> io::Result { - zio::read(&mut self.obj, &mut self.data, into) - } -} - -impl Write for ZlibDecoder { - fn write(&mut self, buf: &[u8]) -> io::Result { - self.get_mut().write(buf) - } - - fn flush(&mut self) -> io::Result<()> { - self.get_mut().flush() - } -} diff --git a/third_party/rust/flate2/v1/crate/src/zlib/mod.rs b/third_party/rust/flate2/v1/crate/src/zlib/mod.rs deleted file mode 100644 index 9d3de95c54c7..000000000000 --- a/third_party/rust/flate2/v1/crate/src/zlib/mod.rs +++ /dev/null @@ -1,159 +0,0 @@ -pub mod bufread; -pub mod read; -pub mod write; - -#[cfg(test)] -mod tests { - use std::io; - use std::io::prelude::*; - - use rand::{thread_rng, Rng}; - - use crate::zlib::{read, write}; - use crate::Compression; - - #[test] - fn roundtrip() { - let mut real = Vec::new(); - let mut w = write::ZlibEncoder::new(Vec::new(), Compression::default()); - let v = crate::random_bytes().take(1024).collect::>(); - for _ in 0..200 { - let to_write = &v[..thread_rng().gen_range(0..v.len())]; - real.extend(to_write.iter().map(|x| *x)); - w.write_all(to_write).unwrap(); - } - let result = w.finish().unwrap(); - let mut r = read::ZlibDecoder::new(&result[..]); - let mut ret = Vec::new(); - r.read_to_end(&mut ret).unwrap(); - assert!(ret == real); - } - - #[test] - fn drop_writes() { - let mut data = Vec::new(); - write::ZlibEncoder::new(&mut data, Compression::default()) - .write_all(b"foo") - .unwrap(); - let mut r = read::ZlibDecoder::new(&data[..]); - let mut ret = Vec::new(); - r.read_to_end(&mut ret).unwrap(); - assert!(ret == b"foo"); - } - - #[test] - fn total_in() { - let mut real = Vec::new(); - let mut w = write::ZlibEncoder::new(Vec::new(), Compression::default()); - let v = crate::random_bytes().take(1024).collect::>(); - for _ in 0..200 { - let to_write = &v[..thread_rng().gen_range(0..v.len())]; - real.extend(to_write.iter().map(|x| *x)); - w.write_all(to_write).unwrap(); - } - let mut result = w.finish().unwrap(); - - let result_len = result.len(); - - for _ in 0..200 { - result.extend(v.iter().map(|x| *x)); - } - - let mut r = read::ZlibDecoder::new(&result[..]); - let mut ret = Vec::new(); - r.read_to_end(&mut ret).unwrap(); - assert!(ret == real); - assert_eq!(r.total_in(), result_len as u64); - } - - #[test] - fn roundtrip2() { - let v = crate::random_bytes().take(1024 * 1024).collect::>(); - let mut r = read::ZlibDecoder::new(read::ZlibEncoder::new(&v[..], Compression::default())); - let mut ret = Vec::new(); - r.read_to_end(&mut ret).unwrap(); - assert_eq!(ret, v); - } - - #[test] - fn roundtrip3() { - let v = crate::random_bytes().take(1024 * 1024).collect::>(); - let mut w = - write::ZlibEncoder::new(write::ZlibDecoder::new(Vec::new()), Compression::default()); - w.write_all(&v).unwrap(); - let w = w.finish().unwrap().finish().unwrap(); - assert!(w == v); - } - - #[test] - fn reset_decoder() { - let v = crate::random_bytes().take(1024 * 1024).collect::>(); - let mut w = write::ZlibEncoder::new(Vec::new(), Compression::default()); - w.write_all(&v).unwrap(); - let data = w.finish().unwrap(); - - { - let (mut a, mut b, mut c) = (Vec::new(), Vec::new(), Vec::new()); - let mut r = read::ZlibDecoder::new(&data[..]); - r.read_to_end(&mut a).unwrap(); - r.reset(&data); - r.read_to_end(&mut b).unwrap(); - - let mut r = read::ZlibDecoder::new(&data[..]); - r.read_to_end(&mut c).unwrap(); - assert!(a == b && b == c && c == v); - } - - { - let mut w = write::ZlibDecoder::new(Vec::new()); - w.write_all(&data).unwrap(); - let a = w.reset(Vec::new()).unwrap(); - w.write_all(&data).unwrap(); - let b = w.finish().unwrap(); - - let mut w = write::ZlibDecoder::new(Vec::new()); - w.write_all(&data).unwrap(); - let c = w.finish().unwrap(); - assert!(a == b && b == c && c == v); - } - } - - #[test] - fn bad_input() { - // regress tests: previously caused a panic on drop - let mut out: Vec = Vec::new(); - let data: Vec = (0..255).cycle().take(1024).collect(); - let mut w = write::ZlibDecoder::new(&mut out); - match w.write_all(&data[..]) { - Ok(_) => panic!("Expected an error to be returned!"), - Err(e) => assert_eq!(e.kind(), io::ErrorKind::InvalidInput), - } - } - - #[test] - fn qc_reader() { - ::quickcheck::quickcheck(test as fn(_) -> _); - - fn test(v: Vec) -> bool { - let mut r = - read::ZlibDecoder::new(read::ZlibEncoder::new(&v[..], Compression::default())); - let mut v2 = Vec::new(); - r.read_to_end(&mut v2).unwrap(); - v == v2 - } - } - - #[test] - fn qc_writer() { - ::quickcheck::quickcheck(test as fn(_) -> _); - - fn test(v: Vec) -> bool { - let mut w = write::ZlibEncoder::new( - write::ZlibDecoder::new(Vec::new()), - Compression::default(), - ); - w.write_all(&v).unwrap(); - v == w.finish().unwrap().finish().unwrap() - } - } -} diff --git a/third_party/rust/flate2/v1/crate/src/zlib/read.rs b/third_party/rust/flate2/v1/crate/src/zlib/read.rs deleted file mode 100644 index 509493166536..000000000000 --- a/third_party/rust/flate2/v1/crate/src/zlib/read.rs +++ /dev/null @@ -1,240 +0,0 @@ -use std::io; -use std::io::prelude::*; - -use super::bufread; -use crate::bufreader::BufReader; - -/// A ZLIB encoder, or compressor. -/// -/// This structure implements a [`Read`] interface and will read uncompressed -/// data from an underlying stream and emit a stream of compressed data. -/// -/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html -/// -/// # Examples -/// -/// ``` -/// use std::io::prelude::*; -/// use flate2::Compression; -/// use flate2::read::ZlibEncoder; -/// use std::fs::File; -/// -/// // Open example file and compress the contents using Read interface -/// -/// # fn open_hello_world() -> std::io::Result> { -/// let f = File::open("examples/hello_world.txt")?; -/// let mut z = ZlibEncoder::new(f, Compression::fast()); -/// let mut buffer = [0;50]; -/// let byte_count = z.read(&mut buffer)?; -/// # Ok(buffer[0..byte_count].to_vec()) -/// # } -/// ``` -#[derive(Debug)] -pub struct ZlibEncoder { - inner: bufread::ZlibEncoder>, -} - -impl ZlibEncoder { - /// Creates a new encoder which will read uncompressed data from the given - /// stream and emit the compressed stream. - pub fn new(r: R, level: crate::Compression) -> ZlibEncoder { - ZlibEncoder { - inner: bufread::ZlibEncoder::new(BufReader::new(r), level), - } - } -} - -impl ZlibEncoder { - /// Resets the state of this encoder entirely, swapping out the input - /// stream for another. - /// - /// This function will reset the internal state of this encoder and replace - /// the input stream with the one provided, returning the previous input - /// stream. Future data read from this encoder will be the compressed - /// version of `r`'s data. - /// - /// Note that there may be currently buffered data when this function is - /// called, and in that case the buffered data is discarded. - pub fn reset(&mut self, r: R) -> R { - super::bufread::reset_encoder_data(&mut self.inner); - self.inner.get_mut().reset(r) - } - - /// Acquires a reference to the underlying stream - pub fn get_ref(&self) -> &R { - self.inner.get_ref().get_ref() - } - - /// Acquires a mutable reference to the underlying stream - /// - /// Note that mutation of the stream may result in surprising results if - /// this encoder is continued to be used. - pub fn get_mut(&mut self) -> &mut R { - self.inner.get_mut().get_mut() - } - - /// Consumes this encoder, returning the underlying reader. - /// - /// Note that there may be buffered bytes which are not re-acquired as part - /// of this transition. It's recommended to only call this function after - /// EOF has been reached. - pub fn into_inner(self) -> R { - self.inner.into_inner().into_inner() - } - - /// Returns the number of bytes that have been read into this compressor. - /// - /// Note that not all bytes read from the underlying object may be accounted - /// for, there may still be some active buffering. - pub fn total_in(&self) -> u64 { - self.inner.total_in() - } - - /// Returns the number of bytes that the compressor has produced. - /// - /// Note that not all bytes may have been read yet, some may still be - /// buffered. - pub fn total_out(&self) -> u64 { - self.inner.total_out() - } -} - -impl Read for ZlibEncoder { - fn read(&mut self, buf: &mut [u8]) -> io::Result { - self.inner.read(buf) - } -} - -impl Write for ZlibEncoder { - fn write(&mut self, buf: &[u8]) -> io::Result { - self.get_mut().write(buf) - } - - fn flush(&mut self) -> io::Result<()> { - self.get_mut().flush() - } -} - -/// A ZLIB decoder, or decompressor. -/// -/// This structure implements a [`Read`] interface and takes a stream of -/// compressed data as input, providing the decompressed data when read from. -/// -/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html -/// -/// # Examples -/// -/// ``` -/// use std::io::prelude::*; -/// use std::io; -/// # use flate2::Compression; -/// # use flate2::write::ZlibEncoder; -/// use flate2::read::ZlibDecoder; -/// -/// # fn main() { -/// # let mut e = ZlibEncoder::new(Vec::new(), Compression::default()); -/// # e.write_all(b"Hello World").unwrap(); -/// # let bytes = e.finish().unwrap(); -/// # println!("{}", decode_reader(bytes).unwrap()); -/// # } -/// # -/// // Uncompresses a Zlib Encoded vector of bytes and returns a string or error -/// // Here &[u8] implements Read -/// -/// fn decode_reader(bytes: Vec) -> io::Result { -/// let mut z = ZlibDecoder::new(&bytes[..]); -/// let mut s = String::new(); -/// z.read_to_string(&mut s)?; -/// Ok(s) -/// } -/// ``` -#[derive(Debug)] -pub struct ZlibDecoder { - inner: bufread::ZlibDecoder>, -} - -impl ZlibDecoder { - /// Creates a new decoder which will decompress data read from the given - /// stream. - pub fn new(r: R) -> ZlibDecoder { - ZlibDecoder::new_with_buf(r, vec![0; 32 * 1024]) - } - - /// Same as `new`, but the intermediate buffer for data is specified. - /// - /// Note that the specified buffer will only be used up to its current - /// length. The buffer's capacity will also not grow over time. - pub fn new_with_buf(r: R, buf: Vec) -> ZlibDecoder { - ZlibDecoder { - inner: bufread::ZlibDecoder::new(BufReader::with_buf(buf, r)), - } - } -} - -impl ZlibDecoder { - /// Resets the state of this decoder entirely, swapping out the input - /// stream for another. - /// - /// This will reset the internal state of this decoder and replace the - /// input stream with the one provided, returning the previous input - /// stream. Future data read from this decoder will be the decompressed - /// version of `r`'s data. - /// - /// Note that there may be currently buffered data when this function is - /// called, and in that case the buffered data is discarded. - pub fn reset(&mut self, r: R) -> R { - super::bufread::reset_decoder_data(&mut self.inner); - self.inner.get_mut().reset(r) - } - - /// Acquires a reference to the underlying stream - pub fn get_ref(&self) -> &R { - self.inner.get_ref().get_ref() - } - - /// Acquires a mutable reference to the underlying stream - /// - /// Note that mutation of the stream may result in surprising results if - /// this encoder is continued to be used. - pub fn get_mut(&mut self) -> &mut R { - self.inner.get_mut().get_mut() - } - - /// Consumes this decoder, returning the underlying reader. - /// - /// Note that there may be buffered bytes which are not re-acquired as part - /// of this transition. It's recommended to only call this function after - /// EOF has been reached. - pub fn into_inner(self) -> R { - self.inner.into_inner().into_inner() - } - - /// Returns the number of bytes that the decompressor has consumed. - /// - /// Note that this will likely be smaller than what the decompressor - /// actually read from the underlying stream due to buffering. - pub fn total_in(&self) -> u64 { - self.inner.total_in() - } - - /// Returns the number of bytes that the decompressor has produced. - pub fn total_out(&self) -> u64 { - self.inner.total_out() - } -} - -impl Read for ZlibDecoder { - fn read(&mut self, into: &mut [u8]) -> io::Result { - self.inner.read(into) - } -} - -impl Write for ZlibDecoder { - fn write(&mut self, buf: &[u8]) -> io::Result { - self.get_mut().write(buf) - } - - fn flush(&mut self) -> io::Result<()> { - self.get_mut().flush() - } -} diff --git a/third_party/rust/flate2/v1/crate/src/zlib/write.rs b/third_party/rust/flate2/v1/crate/src/zlib/write.rs deleted file mode 100644 index c67181402b4c..000000000000 --- a/third_party/rust/flate2/v1/crate/src/zlib/write.rs +++ /dev/null @@ -1,321 +0,0 @@ -use std::io; -use std::io::prelude::*; - -use crate::zio; -use crate::{Compress, Decompress}; - -/// A ZLIB encoder, or compressor. -/// -/// This structure implements a [`Write`] interface and takes a stream of -/// uncompressed data, writing the compressed data to the wrapped writer. -/// -/// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html -/// -/// # Examples -/// -/// ``` -/// use std::io::prelude::*; -/// use flate2::Compression; -/// use flate2::write::ZlibEncoder; -/// -/// // Vec implements Write, assigning the compressed bytes of sample string -/// -/// # fn zlib_encoding() -> std::io::Result<()> { -/// let mut e = ZlibEncoder::new(Vec::new(), Compression::default()); -/// e.write_all(b"Hello World")?; -/// let compressed = e.finish()?; -/// # Ok(()) -/// # } -/// ``` -#[derive(Debug)] -pub struct ZlibEncoder { - inner: zio::Writer, -} - -impl ZlibEncoder { - /// Creates a new encoder which will write compressed data to the stream - /// given at the given compression level. - /// - /// When this encoder is dropped or unwrapped the final pieces of data will - /// be flushed. - pub fn new(w: W, level: crate::Compression) -> ZlibEncoder { - ZlibEncoder { - inner: zio::Writer::new(w, Compress::new(level, true)), - } - } - - /// Acquires a reference to the underlying writer. - pub fn get_ref(&self) -> &W { - self.inner.get_ref() - } - - /// Acquires a mutable reference to the underlying writer. - /// - /// Note that mutating the output/input state of the stream may corrupt this - /// object, so care must be taken when using this method. - pub fn get_mut(&mut self) -> &mut W { - self.inner.get_mut() - } - - /// Resets the state of this encoder entirely, swapping out the output - /// stream for another. - /// - /// This function will finish encoding the current stream into the current - /// output stream before swapping out the two output streams. - /// - /// After the current stream has been finished, this will reset the internal - /// state of this encoder and replace the output stream with the one - /// provided, returning the previous output stream. Future data written to - /// this encoder will be the compressed into the stream `w` provided. - /// - /// # Errors - /// - /// This function will perform I/O to complete this stream, and any I/O - /// errors which occur will be returned from this function. - pub fn reset(&mut self, w: W) -> io::Result { - self.inner.finish()?; - self.inner.data.reset(); - Ok(self.inner.replace(w)) - } - - /// Attempt to finish this output stream, writing out final chunks of data. - /// - /// Note that this function can only be used once data has finished being - /// written to the output stream. After this function is called then further - /// calls to `write` may result in a panic. - /// - /// # Panics - /// - /// Attempts to write data to this stream may result in a panic after this - /// function is called. - /// - /// # Errors - /// - /// This function will perform I/O to complete this stream, and any I/O - /// errors which occur will be returned from this function. - pub fn try_finish(&mut self) -> io::Result<()> { - self.inner.finish() - } - - /// Consumes this encoder, flushing the output stream. - /// - /// This will flush the underlying data stream, close off the compressed - /// stream and, if successful, return the contained writer. - /// - /// Note that this function may not be suitable to call in a situation where - /// the underlying stream is an asynchronous I/O stream. To finish a stream - /// the `try_finish` (or `shutdown`) method should be used instead. To - /// re-acquire ownership of a stream it is safe to call this method after - /// `try_finish` or `shutdown` has returned `Ok`. - /// - /// # Errors - /// - /// This function will perform I/O to complete this stream, and any I/O - /// errors which occur will be returned from this function. - pub fn finish(mut self) -> io::Result { - self.inner.finish()?; - Ok(self.inner.take_inner()) - } - - /// Consumes this encoder, flushing the output stream. - /// - /// This will flush the underlying data stream and then return the contained - /// writer if the flush succeeded. - /// The compressed stream will not closed but only flushed. This - /// means that obtained byte array can by extended by another deflated - /// stream. To close the stream add the two bytes 0x3 and 0x0. - /// - /// # Errors - /// - /// This function will perform I/O to complete this stream, and any I/O - /// errors which occur will be returned from this function. - pub fn flush_finish(mut self) -> io::Result { - self.inner.flush()?; - Ok(self.inner.take_inner()) - } - - /// Returns the number of bytes that have been written to this compressor. - /// - /// Note that not all bytes written to this object may be accounted for, - /// there may still be some active buffering. - pub fn total_in(&self) -> u64 { - self.inner.data.total_in() - } - - /// Returns the number of bytes that the compressor has produced. - /// - /// Note that not all bytes may have been written yet, some may still be - /// buffered. - pub fn total_out(&self) -> u64 { - self.inner.data.total_out() - } -} - -impl Write for ZlibEncoder { - fn write(&mut self, buf: &[u8]) -> io::Result { - self.inner.write(buf) - } - - fn flush(&mut self) -> io::Result<()> { - self.inner.flush() - } -} - -impl Read for ZlibEncoder { - fn read(&mut self, buf: &mut [u8]) -> io::Result { - self.get_mut().read(buf) - } -} - -/// A ZLIB decoder, or decompressor. -/// -/// This structure implements a [`Write`] and will emit a stream of decompressed -/// data when fed a stream of compressed data. -/// -/// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html -/// -/// # Examples -/// -/// ``` -/// use std::io::prelude::*; -/// use std::io; -/// # use flate2::Compression; -/// # use flate2::write::ZlibEncoder; -/// use flate2::write::ZlibDecoder; -/// -/// # fn main() { -/// # let mut e = ZlibEncoder::new(Vec::new(), Compression::default()); -/// # e.write_all(b"Hello World").unwrap(); -/// # let bytes = e.finish().unwrap(); -/// # println!("{}", decode_reader(bytes).unwrap()); -/// # } -/// # -/// // Uncompresses a Zlib Encoded vector of bytes and returns a string or error -/// // Here Vec implements Write -/// -/// fn decode_reader(bytes: Vec) -> io::Result { -/// let mut writer = Vec::new(); -/// let mut z = ZlibDecoder::new(writer); -/// z.write_all(&bytes[..])?; -/// writer = z.finish()?; -/// let return_string = String::from_utf8(writer).expect("String parsing error"); -/// Ok(return_string) -/// } -/// ``` -#[derive(Debug)] -pub struct ZlibDecoder { - inner: zio::Writer, -} - -impl ZlibDecoder { - /// Creates a new decoder which will write uncompressed data to the stream. - /// - /// When this decoder is dropped or unwrapped the final pieces of data will - /// be flushed. - pub fn new(w: W) -> ZlibDecoder { - ZlibDecoder { - inner: zio::Writer::new(w, Decompress::new(true)), - } - } - - /// Acquires a reference to the underlying writer. - pub fn get_ref(&self) -> &W { - self.inner.get_ref() - } - - /// Acquires a mutable reference to the underlying writer. - /// - /// Note that mutating the output/input state of the stream may corrupt this - /// object, so care must be taken when using this method. - pub fn get_mut(&mut self) -> &mut W { - self.inner.get_mut() - } - - /// Resets the state of this decoder entirely, swapping out the output - /// stream for another. - /// - /// This will reset the internal state of this decoder and replace the - /// output stream with the one provided, returning the previous output - /// stream. Future data written to this decoder will be decompressed into - /// the output stream `w`. - /// - /// # Errors - /// - /// This function will perform I/O to complete this stream, and any I/O - /// errors which occur will be returned from this function. - pub fn reset(&mut self, w: W) -> io::Result { - self.inner.finish()?; - self.inner.data = Decompress::new(true); - Ok(self.inner.replace(w)) - } - - /// Attempt to finish this output stream, writing out final chunks of data. - /// - /// Note that this function can only be used once data has finished being - /// written to the output stream. After this function is called then further - /// calls to `write` may result in a panic. - /// - /// # Panics - /// - /// Attempts to write data to this stream may result in a panic after this - /// function is called. - /// - /// # Errors - /// - /// This function will perform I/O to complete this stream, and any I/O - /// errors which occur will be returned from this function. - pub fn try_finish(&mut self) -> io::Result<()> { - self.inner.finish() - } - - /// Consumes this encoder, flushing the output stream. - /// - /// This will flush the underlying data stream and then return the contained - /// writer if the flush succeeded. - /// - /// Note that this function may not be suitable to call in a situation where - /// the underlying stream is an asynchronous I/O stream. To finish a stream - /// the `try_finish` (or `shutdown`) method should be used instead. To - /// re-acquire ownership of a stream it is safe to call this method after - /// `try_finish` or `shutdown` has returned `Ok`. - /// - /// # Errors - /// - /// This function will perform I/O to complete this stream, and any I/O - /// errors which occur will be returned from this function. - pub fn finish(mut self) -> io::Result { - self.inner.finish()?; - Ok(self.inner.take_inner()) - } - - /// Returns the number of bytes that the decompressor has consumed for - /// decompression. - /// - /// Note that this will likely be smaller than the number of bytes - /// successfully written to this stream due to internal buffering. - pub fn total_in(&self) -> u64 { - self.inner.data.total_in() - } - - /// Returns the number of bytes that the decompressor has written to its - /// output stream. - pub fn total_out(&self) -> u64 { - self.inner.data.total_out() - } -} - -impl Write for ZlibDecoder { - fn write(&mut self, buf: &[u8]) -> io::Result { - self.inner.write(buf) - } - - fn flush(&mut self) -> io::Result<()> { - self.inner.flush() - } -} - -impl Read for ZlibDecoder { - fn read(&mut self, buf: &mut [u8]) -> io::Result { - self.inner.get_mut().read(buf) - } -} diff --git a/third_party/rust/flate2/v1/crate/tests/corrupt-gz-file.bin b/third_party/rust/flate2/v1/crate/tests/corrupt-gz-file.bin deleted file mode 100644 index 159333b03275..000000000000 Binary files a/third_party/rust/flate2/v1/crate/tests/corrupt-gz-file.bin and /dev/null differ diff --git a/third_party/rust/flate2/v1/crate/tests/early-flush.rs b/third_party/rust/flate2/v1/crate/tests/early-flush.rs deleted file mode 100644 index e717adaa5243..000000000000 --- a/third_party/rust/flate2/v1/crate/tests/early-flush.rs +++ /dev/null @@ -1,20 +0,0 @@ -extern crate flate2; - -use std::io::{Read, Write}; - -use flate2::read::GzDecoder; -use flate2::write::GzEncoder; - -#[test] -fn smoke() { - let mut w = GzEncoder::new(Vec::new(), flate2::Compression::default()); - w.flush().unwrap(); - w.write_all(b"hello").unwrap(); - - let bytes = w.finish().unwrap(); - - let mut r = GzDecoder::new(&bytes[..]); - let mut s = String::new(); - r.read_to_string(&mut s).unwrap(); - assert_eq!(s, "hello"); -} diff --git a/third_party/rust/flate2/v1/crate/tests/empty-read.rs b/third_party/rust/flate2/v1/crate/tests/empty-read.rs deleted file mode 100644 index 755123833652..000000000000 --- a/third_party/rust/flate2/v1/crate/tests/empty-read.rs +++ /dev/null @@ -1,82 +0,0 @@ -extern crate flate2; - -use std::io::{Read, Write}; - -#[test] -fn deflate_decoder_empty_read() { - let original: &[u8] = b"Lorem ipsum dolor sit amet."; - let mut encoder = - flate2::write::DeflateEncoder::new(Vec::new(), flate2::Compression::default()); - encoder.write_all(original).unwrap(); - let encoded: Vec = encoder.finish().unwrap(); - let mut decoder = flate2::read::DeflateDecoder::new(encoded.as_slice()); - assert_eq!(decoder.read(&mut []).unwrap(), 0); - let mut decoded = Vec::new(); - decoder.read_to_end(&mut decoded).unwrap(); - assert_eq!(decoded.as_slice(), original); -} - -#[test] -fn deflate_encoder_empty_read() { - let original: &[u8] = b"Lorem ipsum dolor sit amet."; - let mut encoder = flate2::read::DeflateEncoder::new(original, flate2::Compression::default()); - assert_eq!(encoder.read(&mut []).unwrap(), 0); - let mut encoded = Vec::new(); - encoder.read_to_end(&mut encoded).unwrap(); - let mut decoder = flate2::read::DeflateDecoder::new(encoded.as_slice()); - let mut decoded = Vec::new(); - decoder.read_to_end(&mut decoded).unwrap(); - assert_eq!(decoded.as_slice(), original); -} - -#[test] -fn gzip_decoder_empty_read() { - let original: &[u8] = b"Lorem ipsum dolor sit amet."; - let mut encoder = flate2::write::GzEncoder::new(Vec::new(), flate2::Compression::default()); - encoder.write_all(original).unwrap(); - let encoded: Vec = encoder.finish().unwrap(); - let mut decoder = flate2::read::GzDecoder::new(encoded.as_slice()); - assert_eq!(decoder.read(&mut []).unwrap(), 0); - let mut decoded = Vec::new(); - decoder.read_to_end(&mut decoded).unwrap(); - assert_eq!(decoded.as_slice(), original); -} - -#[test] -fn gzip_encoder_empty_read() { - let original: &[u8] = b"Lorem ipsum dolor sit amet."; - let mut encoder = flate2::read::GzEncoder::new(original, flate2::Compression::default()); - assert_eq!(encoder.read(&mut []).unwrap(), 0); - let mut encoded = Vec::new(); - encoder.read_to_end(&mut encoded).unwrap(); - let mut decoder = flate2::read::GzDecoder::new(encoded.as_slice()); - let mut decoded = Vec::new(); - decoder.read_to_end(&mut decoded).unwrap(); - assert_eq!(decoded.as_slice(), original); -} - -#[test] -fn zlib_decoder_empty_read() { - let original: &[u8] = b"Lorem ipsum dolor sit amet."; - let mut encoder = flate2::write::ZlibEncoder::new(Vec::new(), flate2::Compression::default()); - encoder.write_all(original).unwrap(); - let encoded: Vec = encoder.finish().unwrap(); - let mut decoder = flate2::read::ZlibDecoder::new(encoded.as_slice()); - assert_eq!(decoder.read(&mut []).unwrap(), 0); - let mut decoded = Vec::new(); - decoder.read_to_end(&mut decoded).unwrap(); - assert_eq!(decoded.as_slice(), original); -} - -#[test] -fn zlib_encoder_empty_read() { - let original: &[u8] = b"Lorem ipsum dolor sit amet."; - let mut encoder = flate2::read::ZlibEncoder::new(original, flate2::Compression::default()); - assert_eq!(encoder.read(&mut []).unwrap(), 0); - let mut encoded = Vec::new(); - encoder.read_to_end(&mut encoded).unwrap(); - let mut decoder = flate2::read::ZlibDecoder::new(encoded.as_slice()); - let mut decoded = Vec::new(); - decoder.read_to_end(&mut decoded).unwrap(); - assert_eq!(decoded.as_slice(), original); -} diff --git a/third_party/rust/flate2/v1/crate/tests/good-file.gz b/third_party/rust/flate2/v1/crate/tests/good-file.gz deleted file mode 100644 index f968689cce01..000000000000 Binary files a/third_party/rust/flate2/v1/crate/tests/good-file.gz and /dev/null differ diff --git a/third_party/rust/flate2/v1/crate/tests/good-file.txt b/third_party/rust/flate2/v1/crate/tests/good-file.txt deleted file mode 100644 index ee39ac53dbd2..000000000000 --- a/third_party/rust/flate2/v1/crate/tests/good-file.txt +++ /dev/null @@ -1,733 +0,0 @@ -## ## -timestep simulated EIR patent hosts -0 0.136402 16855 -1 0.146872 18564 -2 0.150157 20334 -3 0.146358 22159 -4 0.136315 23655 -5 0.122354 24848 -6 0.104753 25887 -7 0.084439 26770 -8 0.06417 27238 -9 0.0450397 27349 -10 0.0295473 27274 -11 0.0184662 26909 -12 0.0110032 26324 -13 0.00634348 25513 -14 0.0036144 24469 -15 0.00208133 23383 -16 0.00122468 22345 -17 0.000752514 21342 -18 0.000545333 20416 -19 0.000546139 19657 -20 0.00054572 18806 -21 0.000545757 18015 -22 0.000545898 17349 -23 0.000546719 16594 -24 0.000547353 15955 -25 0.000547944 15374 -26 0.000547606 14765 -27 0.000594773 14212 -28 0.000969163 13677 -29 0.00168295 13180 -30 0.003059 12760 -31 0.00571599 12313 -32 0.0107918 11896 -33 0.0201943 11512 -34 0.0368013 11340 -35 0.0640629 11323 -36 0.104447 11769 -37 0.157207 12728 -38 0.216682 14261 -39 0.271159 16491 -40 0.303552 19274 -41 0.303678 22157 -42 0.271945 24875 -43 0.215445 27027 -44 0.154503 28690 -45 0.100717 30046 -46 0.0600343 30602 -47 0.0328576 30709 -48 0.016964 30315 -49 0.00841526 29310 -50 0.0040958 28058 -51 0.0019953 26662 -52 0.000986531 25259 -53 0.000545786 24049 -54 0.000546405 22966 -55 0.000546036 21933 -56 0.00054427 20953 -57 0.000542769 20057 -58 0.000541566 19304 -59 0.000541822 18477 -60 0.000541643 17695 -61 0.000541989 17002 -62 0.000769298 16391 -63 0.00150811 15805 -64 0.00295097 15172 -65 0.00566197 14690 -66 0.0105243 14206 -67 0.0186965 13791 -68 0.0313363 13470 -69 0.0490605 13377 -70 0.0711679 13631 -71 0.0953625 14209 -72 0.118026 15277 -73 0.134612 16760 -74 0.144311 18339 -75 0.146328 20124 -76 0.142936 21803 -77 0.134029 23435 -78 0.120562 24854 -79 0.103157 25880 -80 0.0834054 26597 -81 0.0632474 27226 -82 0.0447785 27294 -83 0.0295654 27169 -84 0.0184081 26803 -85 0.0109489 26265 -86 0.00631234 25375 -87 0.00359978 24306 -88 0.00206967 23260 -89 0.00122197 22225 -90 0.000751031 21277 -91 0.000544507 20295 -92 0.000543897 19417 -93 0.000543483 18623 -94 0.000542926 17837 -95 0.000542685 17070 -96 0.000542387 16424 -97 0.000541194 15838 -98 0.000540427 15177 -99 0.000540774 14608 -100 0.000588312 14066 -101 0.000959183 13499 -102 0.00166774 12979 -103 0.00303278 12545 -104 0.00567457 12067 -105 0.0107272 11712 -106 0.0200606 11368 -107 0.0364637 11207 -108 0.063339 11238 -109 0.103717 11660 -110 0.156884 12621 -111 0.217072 14151 -112 0.272311 16358 -113 0.305046 19005 -114 0.304927 21926 -115 0.272427 24662 -116 0.216478 27080 -117 0.155168 29064 -118 0.10079 30370 -119 0.0599659 30992 -120 0.0331287 30975 -121 0.017235 30317 -122 0.00860221 29455 -123 0.00419286 28172 -124 0.00203361 26809 -125 0.000998847 25476 -126 0.000551418 24230 -127 0.000551119 23106 -128 0.000552786 22147 -129 0.000553814 21183 -130 0.000553743 20280 -131 0.000554428 19423 -132 0.000555022 18598 -133 0.000555921 17864 -134 0.000556687 17187 -135 0.000789996 16527 -136 0.00154597 15870 -137 0.00302776 15226 -138 0.00581484 14685 -139 0.010812 14234 -140 0.0191832 13818 -141 0.0321572 13571 -142 0.050328 13538 -143 0.072817 13812 -144 0.0974321 14368 -145 0.120225 15436 -146 0.137418 16988 -147 0.147086 18775 -148 0.149165 20563 -149 0.144943 22223 -150 0.136631 23741 -151 0.123355 24920 -152 0.105401 25779 -153 0.0851918 26781 -154 0.0641702 27265 -155 0.0450746 27505 -156 0.0294136 27416 -157 0.0183811 27028 -158 0.0109285 26260 -159 0.00634296 25451 -160 0.00364513 24472 -161 0.0021051 23427 -162 0.00123693 22403 -163 0.000759531 21393 -164 0.000551727 20485 -165 0.000552256 19660 -166 0.000552303 18862 -167 0.000550927 18094 -168 0.000551098 17378 -169 0.000551093 16691 -170 0.000551885 16050 -171 0.000552282 15420 -172 0.000552591 14878 -173 0.00060109 14357 -174 0.000980446 13768 -175 0.00170301 13241 -176 0.003096 12745 -177 0.00579971 12294 -178 0.010976 11879 -179 0.0205422 11636 -180 0.0374515 11431 -181 0.0649916 11517 -182 0.106008 11966 -183 0.159983 12918 -184 0.221127 14484 -185 0.276503 16696 -186 0.310316 19518 -187 0.311205 22301 -188 0.276769 25047 -189 0.220506 27360 -190 0.159123 29133 -191 0.103761 30440 -192 0.0613797 31087 -193 0.033583 31037 -194 0.0173275 30555 -195 0.00861968 29617 -196 0.00419503 28292 -197 0.00203304 26944 -198 0.00100126 25569 -199 0.000553511 24349 -200 0.000554687 23257 -201 0.00055586 22204 -202 0.000555419 21176 -203 0.000556032 20316 -204 0.000555974 19509 -205 0.000556859 18746 -206 0.000556996 17978 -207 0.000557102 17288 -208 0.000790187 16672 -209 0.00154711 16057 -210 0.00303521 15449 -211 0.00584201 14915 -212 0.0108854 14397 -213 0.0193386 14010 -214 0.0324346 13730 -215 0.0507192 13674 -216 0.0736661 13874 -217 0.0987887 14515 -218 0.122411 15693 -219 0.139964 17265 -220 0.149125 18894 -221 0.151434 20662 -222 0.148067 22442 -223 0.138894 24116 -224 0.125436 25367 -225 0.107664 26360 -226 0.0865709 27044 -227 0.0655588 27428 -228 0.0459664 27714 -229 0.0301384 27687 -230 0.0186481 27262 -231 0.01103 26677 -232 0.00636957 25722 -233 0.00366188 24662 -234 0.00212213 23575 -235 0.00125358 22520 -236 0.000768665 21480 -237 0.000556393 20563 -238 0.000555892 19706 -239 0.00055534 18914 -240 0.000555027 18165 -241 0.000555062 17432 -242 0.000553766 16733 -243 0.000552984 16070 -244 0.000553634 15396 -245 0.000554286 14867 -246 0.000603759 14362 -247 0.000982974 13867 -248 0.00170532 13379 -249 0.00310471 12907 -250 0.00582577 12446 -251 0.0110122 12018 -252 0.0206284 11730 -253 0.0375835 11546 -254 0.0652192 11605 -255 0.10646 11981 -256 0.160858 12949 -257 0.223122 14478 -258 0.279678 16810 -259 0.312171 19452 -260 0.311778 22391 -261 0.276966 25204 -262 0.22251 27379 -263 0.159246 29248 -264 0.104109 30532 -265 0.0617903 30995 -266 0.0338421 31042 -267 0.0174647 30620 -268 0.00867821 29589 -269 0.00419968 28293 -270 0.00203244 26916 -271 0.00100204 25464 -272 0.000555586 24219 -273 0.000555599 23207 -274 0.00055582 22187 -275 0.00055516 21136 -276 0.000555436 20243 -277 0.000555618 19426 -278 0.000556778 18635 -279 0.000556976 17870 -280 0.000557162 17190 -281 0.0007904 16506 -282 0.00154557 15837 -283 0.00302973 15234 -284 0.00584543 14717 -285 0.0108796 14225 -286 0.0192919 13810 -287 0.032329 13605 -288 0.0505293 13536 -289 0.0733417 13760 -290 0.0982413 14378 -291 0.121477 15400 -292 0.138636 17017 -293 0.14875 18764 -294 0.150515 20516 -295 0.146372 22389 -296 0.137332 23975 -297 0.124076 25120 -298 0.106469 26137 -299 0.0862987 26973 -300 0.0650552 27584 -301 0.0456456 27741 -302 0.0300744 27565 -303 0.0187879 27212 -304 0.0112085 26432 -305 0.00648306 25501 -306 0.00370346 24466 -307 0.00213399 23472 -308 0.00125463 22415 -309 0.000765794 21427 -310 0.000552587 20533 -311 0.000553175 19632 -312 0.000553525 18831 -313 0.000554941 18119 -314 0.000556327 17336 -315 0.000556008 16721 -316 0.00055593 16086 -317 0.000556421 15516 -318 0.000557308 14918 -319 0.00060681 14402 -320 0.000990746 13849 -321 0.00172359 13355 -322 0.00313688 12902 -323 0.0058708 12425 -324 0.0110637 12087 -325 0.0206777 11743 -326 0.0376394 11531 -327 0.0656182 11582 -328 0.107414 12034 -329 0.162101 12955 -330 0.223525 14571 -331 0.279935 16842 -332 0.314601 19566 -333 0.313556 22575 -334 0.279571 25279 -335 0.221638 27642 -336 0.158038 29275 -337 0.102505 30638 -338 0.0608328 31209 -339 0.0335531 31260 -340 0.0173332 30520 -341 0.00861545 29604 -342 0.00419454 28370 -343 0.00202587 26940 -344 0.000994029 25614 -345 0.000549339 24445 -346 0.000551477 23239 -347 0.000552891 22300 -348 0.000551775 21280 -349 0.000552425 20424 -350 0.000552135 19571 -351 0.000552542 18753 -352 0.000552863 18058 -353 0.000554438 17348 -354 0.000786735 16671 -355 0.00153958 16047 -356 0.00301482 15500 -357 0.00580589 14883 -358 0.0108227 14347 -359 0.0192357 13947 -360 0.0321613 13672 -361 0.050229 13606 -362 0.0729462 13815 -363 0.0978564 14566 -364 0.120879 15674 -365 0.137663 17049 -366 0.147092 18813 -367 0.150184 20578 -368 0.146971 22245 -369 0.136769 23723 -370 0.12367 24905 -371 0.106187 25871 -372 0.0860921 26687 -373 0.0645899 27375 -374 0.0453473 27635 -375 0.0298122 27551 -376 0.0185448 27134 -377 0.0110517 26468 -378 0.00640294 25661 -379 0.00367011 24653 -380 0.00211832 23556 -381 0.00125246 22513 -382 0.00076891 21568 -383 0.000557384 20672 -384 0.000557295 19811 -385 0.000556837 18982 -386 0.000557433 18179 -387 0.000557376 17457 -388 0.000557751 16720 -389 0.000556844 16112 -390 0.000555603 15479 -391 0.000554871 14809 -392 0.00060335 14275 -393 0.000982808 13757 -394 0.00170757 13221 -395 0.00310351 12758 -396 0.0058181 12286 -397 0.010991 11906 -398 0.0205342 11557 -399 0.0373486 11393 -400 0.0647659 11487 -401 0.105589 11887 -402 0.15967 12798 -403 0.220945 14260 -404 0.277122 16477 -405 0.310108 19295 -406 0.308854 22110 -407 0.274911 24915 -408 0.218618 27273 -409 0.156618 29189 -410 0.101775 30572 -411 0.0607503 31174 -412 0.0334708 31316 -413 0.0173443 30731 -414 0.00865633 29636 -415 0.00421141 28342 -416 0.00204387 26991 -417 0.00100602 25595 -418 0.000555131 24336 -419 0.000555037 23251 -420 0.000555559 22267 -421 0.000554916 21212 -422 0.000554432 20306 -423 0.000554751 19488 -424 0.00055638 18727 -425 0.000556727 17927 -426 0.000556368 17198 -427 0.000788004 16578 -428 0.00154404 15944 -429 0.00302383 15315 -430 0.00582586 14786 -431 0.0108457 14290 -432 0.0192962 13815 -433 0.0323072 13561 -434 0.0505101 13456 -435 0.0732162 13811 -436 0.0978737 14403 -437 0.121405 15460 -438 0.138202 16993 -439 0.1482 18710 -440 0.149707 20578 -441 0.146945 22256 -442 0.137785 23713 -443 0.123767 25058 -444 0.105989 26087 -445 0.085483 26759 -446 0.0646144 27375 -447 0.0454389 27680 -448 0.0299337 27531 -449 0.018663 27041 -450 0.0111347 26416 -451 0.00644197 25614 -452 0.00369229 24666 -453 0.00211986 23647 -454 0.00124761 22650 -455 0.000769104 21642 -456 0.000558796 20693 -457 0.000559908 19746 -458 0.000559562 18952 -459 0.00056042 18100 -460 0.000559447 17401 -461 0.000557893 16756 -462 0.000557137 16148 -463 0.000557269 15504 -464 0.000557596 14974 -465 0.000606298 14408 -466 0.000987712 13909 -467 0.00171257 13402 -468 0.00311667 12891 -469 0.00584794 12433 -470 0.0110774 11980 -471 0.0207006 11713 -472 0.037673 11583 -473 0.0654988 11677 -474 0.106982 12072 -475 0.161926 12898 -476 0.224327 14548 -477 0.281709 16796 -478 0.314567 19512 -479 0.313419 22428 -480 0.278962 25186 -481 0.221864 27755 -482 0.158559 29556 -483 0.103532 30572 -484 0.0611592 31162 -485 0.0337539 31197 -486 0.0175096 30619 -487 0.00865906 29606 -488 0.00420125 28271 -489 0.00203207 26856 -490 0.00100238 25542 -491 0.000554405 24306 -492 0.00055373 23160 -493 0.0005552 22152 -494 0.000553776 21192 -495 0.000553636 20302 -496 0.000553165 19505 -497 0.000554014 18719 -498 0.00055519 17993 -499 0.000556582 17233 -500 0.000788165 16569 -501 0.00154132 15953 -502 0.00302099 15350 -503 0.00581186 14752 -504 0.0108291 14267 -505 0.0192368 13946 -506 0.0322191 13677 -507 0.0503789 13594 -508 0.0730706 13768 -509 0.0980646 14416 -510 0.121601 15634 -511 0.139046 17110 -512 0.147779 18876 -513 0.149612 20734 -514 0.145796 22414 -515 0.136936 23884 -516 0.123807 25078 -517 0.106212 26066 -518 0.0855482 26779 -519 0.0643386 27340 -520 0.0452926 27530 -521 0.0298659 27573 -522 0.0185447 27169 -523 0.0110178 26489 -524 0.00635235 25588 -525 0.00362881 24549 -526 0.00209238 23528 -527 0.00123133 22541 -528 0.000755917 21498 -529 0.000546368 20607 -530 0.000547382 19712 -531 0.000547084 18975 -532 0.000546453 18178 -533 0.000546062 17452 -534 0.000546085 16749 -535 0.000546151 16135 -536 0.000545628 15567 -537 0.000545969 14968 -538 0.000594606 14392 -539 0.000968849 13854 -540 0.00168489 13360 -541 0.00306337 12899 -542 0.00573505 12407 -543 0.0108348 12017 -544 0.02025 11713 -545 0.0368201 11517 -546 0.0639795 11556 -547 0.104882 11941 -548 0.158923 12854 -549 0.219796 14396 -550 0.275801 16733 -551 0.307622 19367 -552 0.30785 22230 -553 0.272898 24873 -554 0.217351 27152 -555 0.156138 29108 -556 0.101477 30379 -557 0.0601091 30971 -558 0.0331551 31126 -559 0.017167 30418 -560 0.00853886 29430 -561 0.00415201 28190 -562 0.00201849 26849 -563 0.000991957 25528 -564 0.000546751 24180 -565 0.00054534 23090 -566 0.000544403 22096 -567 0.00054368 21140 -568 0.000543407 20213 -569 0.000544421 19405 -570 0.000545241 18625 -571 0.000546995 17868 -572 0.000547101 17102 -573 0.00077428 16423 -574 0.00151348 15783 -575 0.00296212 15220 -576 0.00569555 14602 -577 0.0106307 14154 -578 0.0188783 13743 -579 0.0316572 13538 -580 0.0495211 13467 -581 0.0718936 13665 -582 0.0961304 14240 -583 0.119127 15341 -584 0.136233 16912 -585 0.145327 18567 -586 0.146983 20301 -587 0.143022 21953 -588 0.134931 23439 -589 0.121892 24750 -590 0.103955 25688 -591 0.0833804 26253 -592 0.0625106 26918 -593 0.0440419 27279 -594 0.0290823 27159 -595 0.0180758 26786 -596 0.0107654 26049 -597 0.00622673 25202 -598 0.00356716 24168 -599 0.00205866 23122 -600 0.00121254 22076 -601 0.000745744 21100 -602 0.000537789 20207 -603 0.000537982 19340 -604 0.000537795 18527 -605 0.000537955 17768 -606 0.000539259 17117 -607 0.00053942 16425 -608 0.000540477 15701 -609 0.000540424 15134 -610 0.000540084 14558 -611 0.00058571 14069 -612 0.00095364 13498 -613 0.00165505 13054 -614 0.00300205 12616 -615 0.00561724 12142 -616 0.0106079 11720 -617 0.0198178 11410 -618 0.0360368 11231 -619 0.0623418 11314 -620 0.101856 11688 -621 0.15376 12623 -622 0.213046 14078 -623 0.267285 16225 -624 0.299225 18856 -625 0.299517 21756 -626 0.26697 24652 -627 0.2119 27051 -628 0.151393 28925 -629 0.098869 30065 -630 0.0593653 30570 -631 0.0327177 30483 -632 0.0170081 29735 -633 0.0084493 28844 -634 0.00409333 27665 -635 0.00197466 26356 -636 0.000967996 25009 -637 0.000533137 23839 -638 0.000532992 22721 -639 0.000534258 21676 -640 0.000534251 20709 -641 0.000534556 19798 -642 0.000535287 19008 -643 0.000536214 18278 -644 0.000536647 17547 -645 0.000536556 16901 -646 0.000761043 16256 -647 0.00149108 15621 -648 0.00292808 15032 -649 0.0056527 14504 -650 0.0105421 14010 -651 0.0186823 13646 -652 0.0312164 13356 -653 0.0485643 13404 -654 0.0704061 13612 -655 0.0945219 14230 -656 0.117178 15374 -657 0.134568 16843 -658 0.144475 18492 -659 0.146915 20238 -660 0.14393 21958 -661 0.134621 23537 -662 0.121737 24773 -663 0.104744 25772 -664 0.0846226 26427 -665 0.0639754 27040 -666 0.0448457 27279 -667 0.029482 27106 -668 0.0183036 26853 -669 0.0108721 26178 -670 0.00627116 25425 -671 0.0035776 24326 -672 0.00206466 23279 -673 0.00122064 22191 -674 0.000751578 21231 -675 0.000542574 20323 -676 0.000540396 19496 -677 0.000538805 18651 -678 0.00053881 17920 -679 0.000537801 17217 -680 0.000537866 16520 -681 0.000538522 15876 -682 0.000538795 15229 -683 0.000539519 14656 -684 0.000587348 14121 -685 0.000955855 13626 -686 0.00165656 13086 -687 0.00301095 12666 -688 0.00564993 12250 -689 0.0106767 11869 -690 0.0199729 11524 -691 0.03641 11331 -692 0.0632378 11402 -693 0.103483 11788 -694 0.156399 12682 -695 0.215591 14337 -696 0.269462 16547 -697 0.303615 19239 -698 0.304506 22023 -699 0.273068 24769 -700 0.21682 27223 -701 0.154934 29029 -702 0.100495 30241 -703 0.0597382 30801 -704 0.0329221 30881 -705 0.0170591 30288 -706 0.00845353 29329 -707 0.00408176 28108 -708 0.00198037 26715 -709 0.000977102 25340 -710 0.000541566 24039 -711 0.000542333 22965 -712 0.000542417 21858 -713 0.000541182 20952 -714 0.00054038 20049 -715 0.000539725 19192 -716 0.000539603 18409 -717 0.000539754 17700 -718 0.000539679 16960 -719 0.000763508 16287 -720 0.00149327 15637 -721 0.00292609 15057 -722 0.00563308 14524 -723 0.0104893 14003 -724 0.0185874 13625 -725 0.0310985 13319 -726 0.0487417 13278 -727 0.0707124 13502 -728 0.0947795 14147 -729 0.117155 15183 -730 0.133995 16622 diff --git a/third_party/rust/flate2/v1/crate/tests/gunzip.rs b/third_party/rust/flate2/v1/crate/tests/gunzip.rs deleted file mode 100644 index c3820328afc5..000000000000 --- a/third_party/rust/flate2/v1/crate/tests/gunzip.rs +++ /dev/null @@ -1,77 +0,0 @@ -extern crate flate2; - -use flate2::read::GzDecoder; -use flate2::read::MultiGzDecoder; -use std::fs::File; -use std::io::prelude::*; -use std::io::{self, BufReader}; -use std::path::Path; - -// test extraction of a gzipped file -#[test] -fn test_extract_success() { - let content = extract_file(Path::new("tests/good-file.gz")).unwrap(); - let mut expected = Vec::new(); - File::open("tests/good-file.txt") - .unwrap() - .read_to_end(&mut expected) - .unwrap(); - assert!(content == expected); -} -// -// test partial extraction of a multistream gzipped file -#[test] -fn test_extract_success_partial_multi() { - let content = extract_file(Path::new("tests/multi.gz")).unwrap(); - let mut expected = String::new(); - BufReader::new(File::open("tests/multi.txt").unwrap()) - .read_line(&mut expected) - .unwrap(); - assert_eq!(content, expected.as_bytes()); -} - -// test extraction fails on a corrupt file -#[test] -fn test_extract_failure() { - let result = extract_file(Path::new("tests/corrupt-gz-file.bin")); - assert_eq!(result.err().unwrap().kind(), io::ErrorKind::InvalidInput); -} - -//test complete extraction of a multistream gzipped file -#[test] -fn test_extract_success_multi() { - let content = extract_file_multi(Path::new("tests/multi.gz")).unwrap(); - let mut expected = Vec::new(); - File::open("tests/multi.txt") - .unwrap() - .read_to_end(&mut expected) - .unwrap(); - assert_eq!(content, expected); -} - -// Tries to extract path into memory (assuming a .gz file). -fn extract_file(path_compressed: &Path) -> io::Result> { - let mut v = Vec::new(); - let f = File::open(path_compressed)?; - GzDecoder::new(f).read_to_end(&mut v)?; - Ok(v) -} - -// Tries to extract path into memory (decompressing all members in case -// of a multi member .gz file). -fn extract_file_multi(path_compressed: &Path) -> io::Result> { - let mut v = Vec::new(); - let f = File::open(path_compressed)?; - MultiGzDecoder::new(f).read_to_end(&mut v)?; - Ok(v) -} - -#[test] -fn empty_error_once() { - let data: &[u8] = &[]; - let cbjson = GzDecoder::new(data); - let reader = BufReader::new(cbjson); - let mut stream = reader.lines(); - assert!(stream.next().unwrap().is_err()); - assert!(stream.next().is_none()); -} diff --git a/third_party/rust/flate2/v1/crate/tests/multi.gz b/third_party/rust/flate2/v1/crate/tests/multi.gz deleted file mode 100644 index cabc89630fe0..000000000000 Binary files a/third_party/rust/flate2/v1/crate/tests/multi.gz and /dev/null differ diff --git a/third_party/rust/flate2/v1/crate/tests/multi.txt b/third_party/rust/flate2/v1/crate/tests/multi.txt deleted file mode 100644 index 66a52ee7a1d8..000000000000 --- a/third_party/rust/flate2/v1/crate/tests/multi.txt +++ /dev/null @@ -1,2 +0,0 @@ -first -second diff --git a/third_party/rust/flate2/v1/crate/tests/zero-write.rs b/third_party/rust/flate2/v1/crate/tests/zero-write.rs deleted file mode 100644 index f0db86cb8dc1..000000000000 --- a/third_party/rust/flate2/v1/crate/tests/zero-write.rs +++ /dev/null @@ -1,8 +0,0 @@ -extern crate flate2; - -#[test] -fn zero_write_is_error() { - let mut buf = [0u8]; - let writer = flate2::write::DeflateEncoder::new(&mut buf[..], flate2::Compression::default()); - assert!(writer.finish().is_err()); -} diff --git a/third_party/rust/rmp_serde/v0_13/BUILD.gn b/third_party/rust/rmp_serde/v0_13/BUILD.gn deleted file mode 100644 index 09afb1476909..000000000000 --- a/third_party/rust/rmp_serde/v0_13/BUILD.gn +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright 2023 The Chromium Authors -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. - -import("//build/rust/cargo_crate.gni") - -cargo_crate("lib") { - crate_name = "rmp_serde" - epoch = "0.13" - crate_type = "rlib" - - # Only for usage from third-party crates. Add the crate to - # third_party.toml to use it from first-party code. - visibility = [ "//brave/third_party/rust/*" ] - crate_root = "crate/src/lib.rs" - - # Unit tests skipped. Generate with --with-tests to include them. - build_native_rust_unit_tests = false - sources = [ "crate/src/lib.rs" ] - edition = "2015" - cargo_pkg_version = "0.13.7" - cargo_pkg_authors = "Evgeny Safronov " - cargo_pkg_name = "rmp-serde" - cargo_pkg_description = "Serde bindings for RMP" - library_configs -= [ "//build/config/compiler:chromium_code" ] - library_configs += [ "//build/config/compiler:no_chromium_code" ] - executable_configs -= [ "//build/config/compiler:chromium_code" ] - executable_configs += [ "//build/config/compiler:no_chromium_code" ] - deps = [ - "//brave/third_party/rust/byteorder/v1:lib", - "//brave/third_party/rust/rmp/v0_8:lib", - "//third_party/rust/serde/v1:lib", - ] -} diff --git a/third_party/rust/rmp_serde/v0_13/README.chromium b/third_party/rust/rmp_serde/v0_13/README.chromium deleted file mode 100644 index 6b9101b4bb0e..000000000000 --- a/third_party/rust/rmp_serde/v0_13/README.chromium +++ /dev/null @@ -1,6 +0,0 @@ -Name: rmp-serde -URL: https://crates.io/crates/rmp-serde -Description: Serde bindings for RMP -Version: 0.13.7 -Security Critical: yes -License: MIT diff --git a/third_party/rust/rmp_serde/v0_13/crate/CHANGELOG.md b/third_party/rust/rmp_serde/v0_13/crate/CHANGELOG.md deleted file mode 100644 index e45a6e8bf28f..000000000000 --- a/third_party/rust/rmp_serde/v0_13/crate/CHANGELOG.md +++ /dev/null @@ -1,138 +0,0 @@ -# Change Log -All notable changes to this project will be documented in this file. -This project adheres to [Semantic Versioning](http://semver.org/). - -## [Unreleased][unreleased] -## 0.13.7 - 2017-09-13 -### Changed: -- `Raw` and `RawRef` are now serializable. -- Allow to construct `Raw` and `RawRef` from string or from a byte array. - -## 0.13.6 - 2017-08-04 -### Added: -- Serialize struct as a map (#140). - -## 0.13.5 - 2017-07-21 -### Changed -- Switch to using `char::encode_utf8`. - In Rust 1.15, the function `char::encode_utf8` was stabilized. Assuming that `rmp` follows the `serde` standard of supporting the last 3 stable releases, this function is now safe to use. I believe this removes the last allocation required on the serialization path. - -## 0.13.4 - 2017-07-11 -### Fixed -- Fixed build on nightly rustc (#135). - -## 0.13.3 - 2017-05-27 -### Fixed -- Fixed build on nightly rustc (#131). - -## 0.13.2 - 2017-04-30 -### Changed -- Fixed `rmps::decode::from_read` signature by marking that it can only deserialize into `DeserializeOwned`. The previous signature let try to deserialize, for example `&str` and other borrow types and it failed at runtime instead of catching it at compile time. - -## 0.13.1 - 2017-04-25 -### Added -- Add helper `RawRef` struct that allows to deserialize borrowed strings even if they contain invalid UTF-8. This can be when deserializing frames from older MessagePack spec. - -## 0.13.0 - 2017-04-24 -### Added -- Zero-copy deserialization from `&[u8]`. - -### Changed -- Adapt with serde 1.0. - -## 0.12.4 - 2017-03-26 -### Fixed -- Fix compilation on rustc 1.13. - -## 0.12.3 - 2017-03-26 -### Added -- Add helper `Raw` struct that allows to deserialize strings even if they contain invalid UTF-8. This can be when deserializing frames from older MessagePack spec. -- Serializer can now return back its underlying writer by reference, mutable reference and by value. - -## 0.12.2 - 2017-02-17 -### Added -- Added `write`, `to_vec` and `from_read` functions to reduce boilerplate for serializing and deserializing custom types that implement `Serialize` or `Deserialize`. - -## 0.12.1 - 2017-02-11 -### Added -- Allow `Deserializer` to return number of bytes read in case of using Cursor as an underlying reader. - -## 0.12.0 - 2017-02-08 -### Changed -- Adapt with serde 0.9. - -## 0.11.0 - 2017-01-05 -### Changed -- Adapt with RMP core 0.8. -- The `Serializer` now encodes integers using the most effective representation. -- The `Deserializer` now properly decodes integer values that fit in the expected type. -- Default stack protector depth is now 1024 instead of 1000. -- Internal buffer in the `Deserializer` now have some capacity preallocated. - -## 0.10.0 - 2016-10-06 -### Changed -- Update serde dependency to 0.8. - -## 0.9.6 - 2016-08-05 -### Fixed -- Switch unit structs to using the same serialization mechanism as other structs (#76). - -## 0.9.5 - 2016-07-28 -### Added -- Added a wrapper over `rmp::Value` to be able to serialize it. - -## 0.9.4 - 2016-07-11 -### Fixed -- Reading binary should no longer trigger unexpected EOF error on valid read. - -## 0.9.3 - 2016-07-11 -### Changed -- Reuse deserializer buffer on every read for string and binary deserialization without unnecessary intermediate buffer creation. - This change increases the string and binary deserialization performance (many thanks to Fedor Gogolev ). - -## 0.9.2 - 2016-07-03 -### Added -- Implement `size_hint()` function for `SeqVisitor` and `MapVisitor`, so it can be possible to preallocate things, increasing the performance greatly. - -## 0.9.1 - 2016-06-24 -### Fixed -- Serializer should no longer panic with unimplemented error on struct variant serialization ([#64]). - -## 0.9.0 - 2016-03-28 -### Changed -- Adapt code to be compilable with Serde v0.7. - -## 0.8.2 - 2015-11-10 -### Changed -- Fixed stack overflow when unpacking recursive data structures. - -## 0.8.1 - 2015-10-03 -### Changed -- Upper limit for serde version. - -### Fixed -- Use the most effective int encoding - Even if the value is explicitly marked as i64 it must be encoded using - the most effective bytes representation despite of signed it or - unsigned. - -## 0.8.0 - 2015-09-11 -### Changed -- Serializer can now be extended with custom struct encoding policy. -- Improved error types and its messages for serialization part. - - New error type introduced - UnknownLength. Returned on attempt to serialize struct, map or serquence with unknown - length (Serde allows this). - - The new type is returned if necessary. - -### Fixed -- Deserializer now properly works with enums. -- Options with default values (that can be initialized using unit marker) deserialization. - This fix also forbids the following Option deserialization cases: - - Option<()>. - - Option>. - It's impossible to properly deserialize the listed cases without explicit option marker in protocol. -- Serializer now properly serializes unit structs. - Previously it was serialized as a unit (nil), now there is just an empty array ([]). - -[#64]: (https://github.com/3Hren/msgpack-rust/pull/64) -[#76]: (https://github.com/3Hren/msgpack-rust/pull/76) diff --git a/third_party/rust/rmp_serde/v0_13/crate/Cargo.toml b/third_party/rust/rmp_serde/v0_13/crate/Cargo.toml deleted file mode 100644 index b0c8db902fec..000000000000 --- a/third_party/rust/rmp_serde/v0_13/crate/Cargo.toml +++ /dev/null @@ -1,36 +0,0 @@ -# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO -# -# When uploading crates to the registry Cargo will automatically -# "normalize" Cargo.toml files for maximal compatibility -# with all versions of Cargo and also rewrite `path` dependencies -# to registry (e.g. crates.io) dependencies -# -# If you believe there's an error in this file please file an -# issue against the rust-lang/cargo repository. If you're -# editing this file be aware that the upstream Cargo.toml -# will likely look very different (and much more reasonable) - -[package] -name = "rmp-serde" -version = "0.13.7" -authors = ["Evgeny Safronov "] -description = "Serde bindings for RMP" -documentation = "https://docs.rs/rmp-serde" -readme = "../README.md" -keywords = ["msgpack", "MessagePack", "serde", "serialization"] -categories = ["encoding"] -license = "MIT" -repository = "https://github.com/3Hren/msgpack-rust" -[dependencies.byteorder] -version = "1" - -[dependencies.serde] -version = "1" - -[dependencies.rmp] -version = "0.8" -[dev-dependencies.serde_derive] -version = "1" - -[dev-dependencies.serde_bytes] -version = "0.10" diff --git a/third_party/rust/rmp_serde/v0_13/crate/Cargo.toml.orig b/third_party/rust/rmp_serde/v0_13/crate/Cargo.toml.orig deleted file mode 100644 index 4ed0a4f64164..000000000000 --- a/third_party/rust/rmp_serde/v0_13/crate/Cargo.toml.orig +++ /dev/null @@ -1,20 +0,0 @@ -[package] -name = "rmp-serde" -version = "0.13.7" -authors = ["Evgeny Safronov "] -license = "MIT" -description = "Serde bindings for RMP" -repository = "https://github.com/3Hren/msgpack-rust" -documentation = "https://docs.rs/rmp-serde" -readme = "../README.md" -keywords = ["msgpack", "MessagePack", "serde", "serialization"] -categories = ["encoding"] - -[dependencies] -byteorder = "1" -serde = "1" -rmp = "0.8" - -[dev-dependencies] -serde_bytes = "0.10" -serde_derive = "1" diff --git a/third_party/rust/rmp_serde/v0_13/crate/benches/buf.rs b/third_party/rust/rmp_serde/v0_13/crate/benches/buf.rs deleted file mode 100644 index 4a31499714f0..000000000000 --- a/third_party/rust/rmp_serde/v0_13/crate/benches/buf.rs +++ /dev/null @@ -1,38 +0,0 @@ -#![feature(test)] - -extern crate test; - -extern crate serde; -extern crate rmp_serde; - -use serde::{Serialize, Deserialize}; - -use test::{Bencher}; - -#[bench] -fn bench_strings_1000(bencher: &mut Bencher) { - bench_strings(bencher, 1000) -} - -#[bench] -fn bench_strings_5000(bencher: &mut Bencher) { - bench_strings(bencher, 5000) -} - -#[bench] -fn bench_strings_10000(bencher: &mut Bencher) { - bench_strings(bencher, 10000) -} - -fn bench_strings(bencher: &mut Bencher, size: usize) { - let vec: Vec = ::std::iter::repeat("abcdefghijklmnopqrstuvwxyz".into()) - .take(size) - .collect(); - - let mut buf = Vec::new(); - vec.serialize(&mut rmp_serde::Serializer::new(&mut buf)).unwrap(); - - bencher.iter(|| { - >::deserialize(&mut rmp_serde::Deserializer::new(&buf[..])).unwrap(); - }) -} diff --git a/third_party/rust/rmp_serde/v0_13/crate/src/decode.rs b/third_party/rust/rmp_serde/v0_13/crate/src/decode.rs deleted file mode 100644 index 53669a695866..000000000000 --- a/third_party/rust/rmp_serde/v0_13/crate/src/decode.rs +++ /dev/null @@ -1,625 +0,0 @@ -use std::error; -use std::fmt::{self, Display, Formatter}; -use std::io::{self, Cursor}; -use std::str::{self, Utf8Error}; - -use byteorder::{self, ReadBytesExt}; - -use serde; -use serde::de::{self, Deserialize, DeserializeOwned, DeserializeSeed, Visitor}; - -use rmp; -use rmp::Marker; -use rmp::decode::{MarkerReadError, DecodeStringError, ValueReadError, NumValueReadError, - read_array_len}; - -/// Enum representing errors that can occur while decoding MessagePack data. -#[derive(Debug)] -pub enum Error { - /// The enclosed I/O error occured while trying to read a MessagePack - /// marker. - InvalidMarkerRead(io::Error), - /// The enclosed I/O error occured while trying to read the encoded - /// MessagePack data. - InvalidDataRead(io::Error), - /// A mismatch occured between the decoded and expected value types. - TypeMismatch(Marker), - /// A numeric cast failed due to an out-of-range error. - OutOfRange, - /// A decoded array did not have the enclosed expected length. - LengthMismatch(u32), - /// An otherwise uncategorized error occured. See the enclosed `String` for - /// details. - Uncategorized(String), - /// A general error occured while deserializing the expected type. See the - /// enclosed `String` for details. - Syntax(String), - /// An encoded string could not be parsed as UTF-8. - Utf8Error(Utf8Error), - /// The depth limit was exceeded; not currently used. - DepthLimitExceeded, -} - -impl error::Error for Error { - fn description(&self) -> &str { - "error while decoding value" - } - - fn cause(&self) -> Option<&error::Error> { - match *self { - Error::TypeMismatch(..) => None, - Error::InvalidMarkerRead(ref err) => Some(err), - Error::InvalidDataRead(ref err) => Some(err), - Error::LengthMismatch(..) => None, - Error::OutOfRange => None, - Error::Uncategorized(..) => None, - Error::Syntax(..) => None, - Error::Utf8Error(ref err) => Some(err), - Error::DepthLimitExceeded => None, - } - } -} - -impl de::Error for Error { - fn custom(msg: T) -> Self { - Error::Syntax(format!("{}", msg)) - } -} - -impl Display for Error { - fn fmt(&self, fmt: &mut Formatter) -> Result<(), fmt::Error> { - error::Error::description(self).fmt(fmt) - } -} - -impl From for Error { - fn from(err: MarkerReadError) -> Error { - Error::InvalidMarkerRead(err.0) - } -} - -impl From for Error { - fn from(err: Utf8Error) -> Error { - Error::Utf8Error(err) - } -} - -impl From for Error { - fn from(err: ValueReadError) -> Error { - match err { - ValueReadError::TypeMismatch(marker) => Error::TypeMismatch(marker), - ValueReadError::InvalidMarkerRead(err) => Error::InvalidMarkerRead(err), - ValueReadError::InvalidDataRead(err) => Error::InvalidDataRead(err), - } - } -} - -impl From for Error { - fn from(err: NumValueReadError) -> Error { - match err { - NumValueReadError::TypeMismatch(marker) => Error::TypeMismatch(marker), - NumValueReadError::InvalidMarkerRead(err) => Error::InvalidMarkerRead(err), - NumValueReadError::InvalidDataRead(err) => Error::InvalidDataRead(err), - NumValueReadError::OutOfRange => Error::OutOfRange, - } - } -} - -impl<'a> From> for Error { - fn from(err: DecodeStringError) -> Error { - match err { - DecodeStringError::InvalidMarkerRead(err) => Error::InvalidMarkerRead(err), - DecodeStringError::InvalidDataRead(..) => Error::Uncategorized("InvalidDataRead".to_string()), - DecodeStringError::TypeMismatch(..) => Error::Uncategorized("TypeMismatch".to_string()), - DecodeStringError::BufferSizeTooSmall(..) => Error::Uncategorized("BufferSizeTooSmall".to_string()), - DecodeStringError::InvalidUtf8(..) => Error::Uncategorized("InvalidUtf8".to_string()), - } - } -} - -/// A Deserializer that reads bytes from a buffer. -/// -/// # Note -/// -/// All instances of `ErrorKind::Interrupted` are handled by this function and the underlying -/// operation is retried. -pub struct Deserializer { - rd: R, - marker: Option, - depth: usize, -} - -impl<'de> Deserializer> { - pub fn from_slice(slice: &'de [u8]) -> Self { - Deserializer { - rd: SliceReader::new(slice), - marker: None, - depth: 1024, - } - } - - /// Gets a reference to the underlying reader in this decoder. - pub fn get_ref(&self) -> &[u8] { - self.rd.inner - } -} - -impl Deserializer> { - pub fn from_read(rd: R) -> Self { - Deserializer { - rd: ReadReader::new(rd), - // Cached marker in case of deserializing options. - marker: None, - depth: 1024, - } - } - - /// Constructs a new deserializer by consuming the given reader. - pub fn new(rd: R) -> Self { - Self::from_read(rd) - } - - /// Gets a reference to the underlying reader in this decoder. - pub fn get_ref(&self) -> &R { - &self.rd.inner - } - - /// Gets a mutable reference to the underlying reader in this decoder. - pub fn get_mut(&mut self) -> &mut R { - &mut self.rd.inner - } - - /// Consumes this decoder returning the underlying reader. - pub fn into_inner(self) -> R { - self.rd.inner - } -} - -impl> Deserializer>> { - /// Returns the current position of this deserializer, i.e. how many bytes were read. - pub fn position(&self) -> u64 { - self.rd.inner.position() - } -} - -impl<'de, R: Read<'de>> Deserializer { - /// Changes the maximum nesting depth that is allowed - pub fn set_max_depth(&mut self, depth: usize) { - self.depth = depth; - } - - fn read_str_data(&mut self, len: u32, visitor: V) -> Result - where V: Visitor<'de> - { - match self.read_bin_data(len as u32)? { - Reference::Borrowed(buf) => { - match str::from_utf8(buf) { - Ok(s) => visitor.visit_borrowed_str(s), - Err(err) => { - // Allow to unpack invalid UTF-8 bytes into a byte array. - match visitor.visit_borrowed_bytes::(buf) { - Ok(buf) => Ok(buf), - Err(..) => Err(Error::Utf8Error(err)), - } - } - } - } - Reference::Copied(buf) => { - match str::from_utf8(buf) { - Ok(s) => visitor.visit_str(s), - Err(err) => { - // Allow to unpack invalid UTF-8 bytes into a byte array. - match visitor.visit_bytes::(buf) { - Ok(buf) => Ok(buf), - Err(..) => Err(Error::Utf8Error(err)), - } - } - } - } - } - } - - fn read_bin_data<'a>(&'a mut self, len: u32) -> Result, Error> { - self.rd.read_slice(len as usize).map_err(Error::InvalidDataRead) - } - - fn read_array(&mut self, len: u32, visitor: V) -> Result - where V: Visitor<'de> - { - visitor.visit_seq(SeqAccess::new(self, len as usize)) - } - - fn read_map(&mut self, len: u32, visitor: V) -> Result - where V: Visitor<'de> - { - visitor.visit_map(MapAccess::new(self, len as usize)) - } - - fn read_bytes(&mut self, len: u32, visitor: V) -> Result - where V: Visitor<'de> - { - match self.read_bin_data(len)? { - Reference::Borrowed(buf) => visitor.visit_borrowed_bytes(buf), - Reference::Copied(buf) => visitor.visit_bytes(buf), - } - } -} - -fn read_u8<'de, R: Read<'de>>(rd: &mut R) -> Result { - rd.read_u8().map_err(Error::InvalidDataRead) -} - -fn read_u16<'de, R: Read<'de>>(rd: &mut R) -> Result { - rd.read_u16::().map_err(Error::InvalidDataRead) -} - -fn read_u32<'de, R: Read<'de>>(rd: &mut R) -> Result { - rd.read_u32::().map_err(Error::InvalidDataRead) -} - -impl<'de, 'a, R: Read<'de>> serde::Deserializer<'de> for &'a mut Deserializer { - type Error = Error; - - fn deserialize_any(self, visitor: V) -> Result - where V: Visitor<'de> - { - let marker = match self.marker.take() { - Some(marker) => marker, - None => rmp::decode::read_marker(&mut self.rd)?, - }; - - match marker { - Marker::Null => visitor.visit_unit(), - Marker::True => visitor.visit_bool(true), - Marker::False => visitor.visit_bool(false), - Marker::FixPos(val) => visitor.visit_u8(val), - Marker::FixNeg(val) => visitor.visit_i8(val), - Marker::U8 => visitor.visit_u8(rmp::decode::read_data_u8(&mut self.rd)?), - Marker::U16 => visitor.visit_u16(rmp::decode::read_data_u16(&mut self.rd)?), - Marker::U32 => visitor.visit_u32(rmp::decode::read_data_u32(&mut self.rd)?), - Marker::U64 => visitor.visit_u64(rmp::decode::read_data_u64(&mut self.rd)?), - Marker::I8 => visitor.visit_i8(rmp::decode::read_data_i8(&mut self.rd)?), - Marker::I16 => visitor.visit_i16(rmp::decode::read_data_i16(&mut self.rd)?), - Marker::I32 => visitor.visit_i32(rmp::decode::read_data_i32(&mut self.rd)?), - Marker::I64 => visitor.visit_i64(rmp::decode::read_data_i64(&mut self.rd)?), - Marker::F32 => visitor.visit_f32(rmp::decode::read_data_f32(&mut self.rd)?), - Marker::F64 => visitor.visit_f64(rmp::decode::read_data_f64(&mut self.rd)?), - Marker::FixStr(len) => { - self.read_str_data(len as u32, visitor) - } - Marker::Str8 => { - let len = read_u8(&mut self.rd)?; - self.read_str_data(len as u32, visitor) - } - Marker::Str16 => { - let len = read_u16(&mut self.rd)?; - self.read_str_data(len as u32, visitor) - } - Marker::Str32 => { - let len = read_u32(&mut self.rd)?; - self.read_str_data(len as u32, visitor) - } - Marker::FixArray(len) => { - self.read_array(len as u32, visitor) - } - Marker::Array16 => { - let len = read_u16(&mut self.rd)?; - self.read_array(len as u32, visitor) - } - Marker::Array32 => { - let len = read_u32(&mut self.rd)?; - self.read_array(len, visitor) - } - Marker::FixMap(len) => { - self.read_map(len as u32, visitor) - } - Marker::Map16 => { - let len = read_u16(&mut self.rd)?; - self.read_map(len as u32, visitor) - } - Marker::Map32 => { - let len = read_u32(&mut self.rd)?; - self.read_map(len, visitor) - } - Marker::Bin8 => { - let len = read_u8(&mut self.rd)?; - self.read_bytes(len as u32, visitor) - } - Marker::Bin16 => { - let len = read_u16(&mut self.rd)?; - self.read_bytes(len as u32, visitor) - } - Marker::Bin32 => { - let len = read_u32(&mut self.rd)?; - self.read_bytes(len, visitor) - } - Marker::Reserved => Err(Error::TypeMismatch(Marker::Reserved)), - // TODO: Make something with exts. - marker => Err(Error::TypeMismatch(marker)), - } - } - - fn deserialize_option(self, visitor: V) -> Result - where V: Visitor<'de> - { - let marker = rmp::decode::read_marker(&mut self.rd)?; - - if marker == Marker::Null { - visitor.visit_none() - } else { - self.marker = Some(marker); - visitor.visit_some(self) - } - } - - fn deserialize_enum(self, _name: &str, _variants: &[&str], visitor: V) -> Result - where V: Visitor<'de> - { - match read_array_len(&mut self.rd)? { - 2 => visitor.visit_enum(VariantAccess::new(self)), - n => Err(Error::LengthMismatch(n as u32)), - } - } - - fn deserialize_newtype_struct(self, _name: &'static str, visitor: V) -> Result - where V: Visitor<'de> - { - match read_array_len(&mut self.rd)? { - 1 => visitor.visit_newtype_struct(self), - n => Err(Error::LengthMismatch(n as u32)), - } - } - - forward_to_deserialize_any! { - bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char - str string bytes byte_buf unit unit_struct seq map - tuple_struct struct identifier tuple - ignored_any - } -} - -struct SeqAccess<'a, R: 'a> { - de: &'a mut Deserializer, - left: usize, -} - -impl<'a, R: 'a> SeqAccess<'a, R> { - fn new(de: &'a mut Deserializer, len: usize) -> Self { - SeqAccess { - de: de, - left: len, - } - } -} - -impl<'de, 'a, R: Read<'de> + 'a> de::SeqAccess<'de> for SeqAccess<'a, R> { - type Error = Error; - - fn next_element_seed(&mut self, seed: T) -> Result, Self::Error> - where T: DeserializeSeed<'de> - { - if self.left > 0 { - self.left -= 1; - Ok(Some(seed.deserialize(&mut *self.de)?)) - } else { - Ok(None) - } - } - - fn size_hint(&self) -> Option { - Some(self.left) - } -} - -struct MapAccess<'a, R: 'a> { - de: &'a mut Deserializer, - left: usize, -} - -impl<'a, R: 'a> MapAccess<'a, R> { - fn new(de: &'a mut Deserializer, len: usize) -> Self { - MapAccess { - de: de, - left: len, - } - } -} - -impl<'de, 'a, R: Read<'de> + 'a> de::MapAccess<'de> for MapAccess<'a, R> { - type Error = Error; - - fn next_key_seed(&mut self, seed: K) -> Result, Self::Error> - where K: DeserializeSeed<'de> - { - if self.left > 0 { - self.left -= 1; - Ok(Some(seed.deserialize(&mut *self.de)?)) - } else { - Ok(None) - } - } - - fn next_value_seed(&mut self, seed: V) -> Result - where V: DeserializeSeed<'de> - { - Ok(seed.deserialize(&mut *self.de)?) - } - - fn size_hint(&self) -> Option { - Some(self.left) - } -} - -/// Default variant visitor. -/// -/// # Note -/// -/// We use default behaviour for new type, which decodes enums with a single value as a tuple. -pub struct VariantAccess<'a, R: 'a> { - de: &'a mut Deserializer, -} - -impl<'a, R: 'a> VariantAccess<'a, R> { - pub fn new(de: &'a mut Deserializer) -> Self { - VariantAccess { - de: de, - } - } -} - -impl<'de, 'a, R: Read<'de>> de::EnumAccess<'de> for VariantAccess<'a, R> { - type Error = Error; - type Variant = Self; - - fn variant_seed(self, seed: V) -> Result<(V::Value, Self), Error> - where V: de::DeserializeSeed<'de>, - { - use serde::de::IntoDeserializer; - - let idx: u32 = serde::Deserialize::deserialize(&mut *self.de)?; - let val: Result<_, Error> = seed.deserialize(idx.into_deserializer()); - Ok((val?, self)) - } -} - -impl<'de, 'a, R: Read<'de>> de::VariantAccess<'de> for VariantAccess<'a, R> { - type Error = Error; - - fn unit_variant(self) -> Result<(), Error> { - read_array_len(&mut self.de.rd)?; - Ok(()) - } - - fn newtype_variant_seed(self, seed: T) -> Result - where T: DeserializeSeed<'de> - { - read_array_len(&mut self.de.rd)?; - seed.deserialize(self.de) - } - - fn tuple_variant(self, len: usize, visitor: V) -> Result - where V: Visitor<'de> - { - de::Deserializer::deserialize_tuple(self.de, len, visitor) - } - - fn struct_variant(self, fields: &'static [&'static str], visitor: V) -> Result - where V: Visitor<'de> - { - de::Deserializer::deserialize_tuple(self.de, fields.len(), visitor) - } -} - -#[derive(Clone, Copy, Debug, PartialEq)] -pub enum Reference<'b, 'c, T: ?Sized + 'static> { - Borrowed(&'b T), - Copied(&'c T), -} - -pub trait Read<'de>: io::Read { - fn read_slice<'a>(&'a mut self, len: usize) -> Result, io::Error>; -} - -pub struct SliceReader<'a> { - inner: &'a [u8], -} - -impl<'a> SliceReader<'a> { - fn new(slice: &'a [u8]) -> Self { - SliceReader { - inner: slice, - } - } -} - -impl<'de> Read<'de> for SliceReader<'de> { - #[inline] - fn read_slice<'a>(&'a mut self, len: usize) -> Result, io::Error> { - if len > self.inner.len() { - return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "unexpected EOF")) - } - let (a, b) = self.inner.split_at(len); - self.inner = b; - Ok(Reference::Borrowed(a)) - } -} - -impl<'a> io::Read for SliceReader<'a> { - #[inline] - fn read(&mut self, buf: &mut [u8]) -> io::Result { - self.inner.read(buf) - } - - #[inline] - fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> { - self.inner.read_exact(buf) - } -} - -pub struct ReadReader { - inner: R, - buf: Vec -} - -impl ReadReader { - fn new(rd: R) -> Self { - ReadReader { - inner: rd, - buf: Vec::with_capacity(128), - } - } -} - -impl<'de, R: io::Read> Read<'de> for ReadReader { - #[inline] - fn read_slice<'a>(&'a mut self, len: usize) -> Result, io::Error> { - self.buf.resize(len, 0u8); - - self.inner.read_exact(&mut self.buf[..])?; - - Ok(Reference::Copied(&self.buf[..])) - } -} - -impl io::Read for ReadReader { - #[inline] - fn read(&mut self, buf: &mut [u8]) -> io::Result { - self.inner.read(buf) - } - - #[inline] - fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> { - self.inner.read_exact(buf) - } -} - -#[test] -fn test_slice_read() { - let buf = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; - let mut rd = SliceReader::new(&buf[..]); - - assert_eq!(rd.read_slice(1).unwrap(), Reference::Borrowed(&[0][..])); - assert_eq!(rd.read_slice(6).unwrap(), Reference::Borrowed(&[1, 2, 3, 4, 5, 6][..])); - assert!(rd.read_slice(5).is_err()); - assert_eq!(rd.read_slice(4).unwrap(), Reference::Borrowed(&[7, 8, 9, 10][..])); -} - -/// Deserialize an instance of type `T` from an I/O stream of MessagePack. -/// -/// This conversion can fail if the structure of the Value does not match the structure expected -/// by `T`. It can also fail if the structure is correct but `T`'s implementation of `Deserialize` -/// decides that something is wrong with the data, for example required struct fields are missing. -pub fn from_read(rd: R) -> Result - where R: io::Read, - T: DeserializeOwned -{ - Deserialize::deserialize(&mut Deserializer::new(rd)) -} - -/// Deserializes a byte slice into the desired type. -pub fn from_slice<'a, T>(input: &'a [u8]) -> Result - where T: serde::Deserialize<'a> -{ - let mut de = Deserializer::from_slice(input); - serde::Deserialize::deserialize(&mut de) -} diff --git a/third_party/rust/rmp_serde/v0_13/crate/src/encode.rs b/third_party/rust/rmp_serde/v0_13/crate/src/encode.rs deleted file mode 100644 index b0e76e59ec0f..000000000000 --- a/third_party/rust/rmp_serde/v0_13/crate/src/encode.rs +++ /dev/null @@ -1,523 +0,0 @@ -use std::error; -use std::fmt::{self, Display}; -use std::io::Write; - -use serde; -use serde::Serialize; -use serde::ser::{SerializeMap, SerializeSeq, SerializeStruct, SerializeStructVariant, - SerializeTuple, SerializeTupleStruct, SerializeTupleVariant}; - -use rmp; -use rmp::Marker; -use rmp::encode::{write_nil, write_bool, write_uint, write_sint, write_f32, write_f64, write_str, - write_array_len, write_map_len, write_bin_len, ValueWriteError}; - -#[derive(Debug)] -pub enum Error { - InvalidValueWrite(ValueWriteError), - - /// Failed to serialize struct, sequence or map, because its length is unknown. - UnknownLength, - - /// Depth limit exceeded - DepthLimitExceeded, - Syntax(String), -} - -impl error::Error for Error { - fn description(&self) -> &str { - match *self { - Error::InvalidValueWrite(..) => "invalid value write", - Error::UnknownLength => { - "attempt to serialize struct, sequence or map with unknown length" - } - Error::DepthLimitExceeded => "depth limit exceeded", - Error::Syntax(..) => "syntax error", - } - } - - fn cause(&self) -> Option<&error::Error> { - match *self { - Error::InvalidValueWrite(ref err) => Some(err), - Error::UnknownLength => None, - Error::DepthLimitExceeded => None, - Error::Syntax(..) => None, - } - } -} - -impl Display for Error { - fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { - error::Error::description(self).fmt(f) - } -} - -impl From for Error { - fn from(err: ValueWriteError) -> Error { - Error::InvalidValueWrite(err) - } -} - -impl serde::ser::Error for Error { - /// Raised when there is general error when deserializing a type. - fn custom(msg: T) -> Error { - Error::Syntax(format!("{}", msg)) - } -} - -// TODO: Great idea - drop this trait and make Serializer types configurable. -pub trait VariantWriter { - fn write_struct_len(&self, wr: &mut W, len: u32) -> Result - where W: Write; - fn write_field_name(&self, wr: &mut W, key: &str) -> Result<(), ValueWriteError> - where W: Write; -} - -/// Writes struct as MessagePack array with no field names -pub struct StructArrayWriter; - -impl VariantWriter for StructArrayWriter { - fn write_struct_len(&self, wr: &mut W, len: u32) -> Result - where W: Write - { - write_array_len(wr, len) - } - - /// This implementation does not write field names - #[allow(unused_variables)] - fn write_field_name(&self, wr: &mut W, _key: &str) -> Result<(), ValueWriteError> - where W: Write - { - Ok(()) - } -} - -pub struct StructMapWriter; - -impl VariantWriter for StructMapWriter { - fn write_struct_len(&self, wr: &mut W, len: u32) -> Result - where - W: Write, - { - write_map_len(wr, len) - } - - fn write_field_name(&self, wr: &mut W, key: &str) -> Result<(), ValueWriteError> - where - W: Write, - { - write_str(wr, key) - } -} -impl Serializer { - /// Constructs a new `MessagePack` serializer whose output will be written to the writer - /// specified. - /// - /// # Note - /// - /// This is the default constructor, which returns a serializer that will serialize structs - /// using large named representation. - pub fn new_named(wr: W) -> Self { - Serializer::with(wr, StructMapWriter) - } -} -/// Represents MessagePack serialization implementation. -/// -/// # Note -/// -/// MessagePack has no specification about how to encode enum types. Thus we are free to do -/// whatever we want, so the given chose may be not ideal for you. -/// -/// Every Rust enum value can be represented as a tuple of index with a value. -/// -/// All instances of `ErrorKind::Interrupted` are handled by this function and the underlying -/// operation is retried. -// TODO: Docs. Examples. -pub struct Serializer { - wr: W, - vw: V, - depth: usize, -} - -impl Serializer { - /// Changes the maximum nesting depth that is allowed - pub fn set_max_depth(&mut self, depth: usize) { - self.depth = depth; - } -} - -impl Serializer { - /// Constructs a new `MessagePack` serializer whose output will be written to the writer - /// specified. - /// - /// # Note - /// - /// This is the default constructor, which returns a serializer that will serialize structs - /// using compact tuple representation, without field names. - pub fn new(wr: W) -> Self { - Serializer::with(wr, StructArrayWriter) - } - pub fn compact(wr: W) -> Self { - Serializer::with(wr, StructArrayWriter) - } -} - -impl Serializer { - /// Gets a reference to the underlying writer. - pub fn get_ref(&self) -> &W { - &self.wr - } - - /// Gets a mutable reference to the underlying writer. - /// - /// It is inadvisable to directly write to the underlying writer. - pub fn get_mut(&mut self) -> &mut W { - &mut self.wr - } - - /// Unwraps this `Serializer`, returning the underlying writer. - pub fn into_inner(self) -> W { - self.wr - } -} - -impl Serializer { - /// Creates a new MessagePack encoder whose output will be written to the writer specified. - pub fn with(wr: W, vw: V) -> Self { - Serializer { - wr: wr, - vw: vw, - depth: 1024, - } - } -} - -pub struct Compound<'a, W: 'a, V: 'a> { - // Note, that the implementation is stateless. - se: &'a mut Serializer, -} - -impl<'a, W: Write + 'a, V: VariantWriter + 'a> SerializeSeq for Compound<'a, W, V> { - type Ok = (); - type Error = Error; - - fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> { - value.serialize(&mut *self.se) - } - - fn end(self) -> Result { - Ok(()) - } -} - -impl<'a, W: Write + 'a, V: VariantWriter + 'a> SerializeTuple for Compound<'a, W, V> { - type Ok = (); - type Error = Error; - - fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> { - value.serialize(&mut *self.se) - } - - fn end(self) -> Result { - Ok(()) - } -} - -impl<'a, W: Write + 'a, V: VariantWriter + 'a> SerializeTupleStruct for Compound<'a, W, V> { - type Ok = (); - type Error = Error; - - fn serialize_field(&mut self, value: &T) -> Result<(), Self::Error> { - value.serialize(&mut *self.se) - } - - fn end(self) -> Result { - Ok(()) - } -} - -impl<'a, W: Write + 'a, V: VariantWriter + 'a> SerializeTupleVariant for Compound<'a, W, V> { - type Ok = (); - type Error = Error; - - fn serialize_field(&mut self, value: &T) -> Result<(), Self::Error> { - value.serialize(&mut *self.se) - } - - fn end(self) -> Result { - Ok(()) - } -} - -impl<'a, W: Write + 'a, V: VariantWriter + 'a> SerializeMap for Compound<'a, W, V> { - type Ok = (); - type Error = Error; - - fn serialize_key(&mut self, key: &T) -> Result<(), Self::Error> { - key.serialize(&mut *self.se) - } - - fn serialize_value(&mut self, value: &T) -> Result<(), Self::Error> { - value.serialize(&mut *self.se) - } - - fn end(self) -> Result { - Ok(()) - } -} - -impl<'a, W: Write + 'a, V: VariantWriter + 'a> SerializeStruct for Compound<'a, W, V> { - type Ok = (); - type Error = Error; - - fn serialize_field(&mut self, key: &'static str, value: &T) -> - Result<(), Self::Error> - { - self.se.vw.write_field_name(&mut self.se.wr, key)?; - value.serialize(&mut *self.se) - } - - fn end(self) -> Result { - Ok(()) - } -} - -impl<'a, W: Write + 'a, V: VariantWriter + 'a> SerializeStructVariant for Compound<'a, W, V> { - type Ok = (); - type Error = Error; - - fn serialize_field(&mut self, _key: &'static str, value: &T) -> - Result<(), Self::Error> - { - value.serialize(&mut *self.se) - } - - fn end(self) -> Result { - Ok(()) - } -} - -impl<'a, W: Write, V: VariantWriter> serde::Serializer for &'a mut Serializer { - type Ok = (); - type Error = Error; - - type SerializeSeq = Compound<'a, W, V>; - type SerializeTuple = Compound<'a, W, V>; - type SerializeTupleStruct = Compound<'a, W, V>; - type SerializeTupleVariant = Compound<'a, W, V>; - type SerializeMap = Compound<'a, W, V>; - type SerializeStruct = Compound<'a, W, V>; - type SerializeStructVariant = Compound<'a, W, V>; - - fn serialize_bool(self, v: bool) -> Result { - write_bool(&mut self.wr, v) - .map_err(|err| Error::InvalidValueWrite(ValueWriteError::InvalidMarkerWrite(err))) - } - - fn serialize_i8(self, v: i8) -> Result { - self.serialize_i64(v as i64) - } - - fn serialize_i16(self, v: i16) -> Result { - self.serialize_i64(v as i64) - } - - fn serialize_i32(self, v: i32) -> Result { - self.serialize_i64(v as i64) - } - - fn serialize_i64(self, v: i64) -> Result { - write_sint(&mut self.wr, v)?; - Ok(()) - } - - fn serialize_u8(self, v: u8) -> Result { - self.serialize_u64(v as u64) - } - - fn serialize_u16(self, v: u16) -> Result { - self.serialize_u64(v as u64) - } - - fn serialize_u32(self, v: u32) -> Result { - self.serialize_u64(v as u64) - } - - fn serialize_u64(self, v: u64) -> Result { - write_uint(&mut self.wr, v)?; - Ok(()) - } - - fn serialize_f32(self, v: f32) -> Result { - write_f32(&mut self.wr, v)?; - Ok(()) - } - - fn serialize_f64(self, v: f64) -> Result { - write_f64(&mut self.wr, v)?; - Ok(()) - } - - fn serialize_char(self, v: char) -> Result { - // A char encoded as UTF-8 takes 4 bytes at most. - let mut buf = [0; 4]; - self.serialize_str(v.encode_utf8(&mut buf)) - } - - fn serialize_str(self, v: &str) -> Result { - write_str(&mut self.wr, v)?; - Ok(()) - } - - fn serialize_bytes(self, value: &[u8]) -> Result { - write_bin_len(&mut self.wr, value.len() as u32)?; - self.wr - .write_all(value) - .map_err(|err| Error::InvalidValueWrite(ValueWriteError::InvalidDataWrite(err))) - } - - fn serialize_none(self) -> Result<(), Self::Error> { - self.serialize_unit() - } - - fn serialize_some(self, v: &T) -> Result<(), Self::Error> { - v.serialize(self) - } - - fn serialize_unit(self) -> Result { - write_nil(&mut self.wr) - .map_err(|err| Error::InvalidValueWrite(ValueWriteError::InvalidMarkerWrite(err))) - } - - fn serialize_unit_struct(self, _name: &'static str) -> Result { - self.vw.write_struct_len(&mut self.wr, 0)?; - Ok(()) - } - - fn serialize_unit_variant(self, _name: &str, idx: u32, _variant: &str) -> - Result - { - write_array_len(&mut self.wr, 2)?; - self.serialize_u32(idx)?; - write_array_len(&mut self.wr, 0)?; - Ok(()) - } - - fn serialize_newtype_struct(self, name: &'static str, value: &T) -> Result<(), Self::Error> { - self.serialize_tuple_struct(name, 1)?; - value.serialize(self) - } - - fn serialize_newtype_variant(self, name: &'static str, variant_index: u32, variant: &'static str, value: &T) -> Result { - self.serialize_tuple_variant(name, variant_index, variant, 1)?; - value.serialize(self) - } - - fn serialize_seq(self, len: Option) -> Result { - let len = match len { - Some(len) => len, - None => return Err(Error::UnknownLength), - }; - - write_array_len(&mut self.wr, len as u32)?; - - Ok(Compound { se: self }) - } - - fn serialize_tuple(self, len: usize) -> Result { - self.serialize_seq(Some(len)) - } - - fn serialize_tuple_struct(self, _name: &'static str, len: usize) -> - Result - { - self.serialize_tuple(len) - } - - fn serialize_tuple_variant(self, name: &'static str, idx: u32, _variant: &'static str, len: usize) -> - Result - { - // We encode variant types as a tuple of id with array of args, like: [id, [args...]]. - rmp::encode::write_array_len(&mut self.wr, 2)?; - self.serialize_u32(idx)?; - self.serialize_tuple_struct(name, len) - } - - fn serialize_map(self, len: Option) -> Result { - match len { - Some(len) => { - write_map_len(&mut self.wr, len as u32)?; - Ok(Compound { se: self }) - } - None => Err(Error::UnknownLength), - } - } - - fn serialize_struct(self, _name: &'static str, len: usize) -> - Result - { - self.vw.write_struct_len(&mut self.wr, len as u32)?; - Ok(Compound { se: self }) - } - - fn serialize_struct_variant(self, name: &'static str, id: u32, _variant: &'static str, len: usize) -> - Result - { - write_array_len(&mut self.wr, 2)?; - self.serialize_u32(id)?; - self.serialize_struct(name, len) - } -} - -/// Serialize the given data structure as MessagePack into the I/O stream. -/// This fyunction uses compact representation - structures as arrays -/// -/// Serialization can fail if `T`'s implementation of `Serialize` decides to fail. -#[inline] -pub fn write(wr: &mut W, val: &T) -> Result<(), Error> - where W: Write, - T: Serialize -{ - val.serialize(&mut Serializer::compact(wr)) -} - -/// Serialize the given data structure as MessagePack into the I/O stream. -/// This function serializes structures as maps -/// -/// Serialization can fail if `T`'s implementation of `Serialize` decides to fail. -#[inline] -pub fn write_named(wr: &mut W, val: &T) -> Result<(), Error> -where - W: Write, - T: Serialize, -{ - val.serialize(&mut Serializer::new_named(wr)) -} - -/// Serialize the given data structure as a MessagePack byte vector. -/// This method uses compact representation, structs are serialized as arrays -/// -/// Serialization can fail if `T`'s implementation of `Serialize` decides to fail. -#[inline] -pub fn to_vec(val: &T) -> Result, Error> - where T: Serialize -{ - let mut buf = Vec::with_capacity(128); - write(&mut buf, val)?; - Ok(buf) -} - -/// Serializes data structure into byte vector as a map -/// Resulting MessagePack message will contain field names -/// -/// Serialization can fail if `T`'s implementation of `Serialize` decides to fail. -#[inline] -pub fn to_vec_named(value: &T) -> Result, Error> -where - T: serde::Serialize, -{ - let mut buf = Vec::with_capacity(64); - value.serialize(&mut Serializer::new_named(&mut buf))?; - Ok(buf) -} - diff --git a/third_party/rust/rmp_serde/v0_13/crate/src/lib.rs b/third_party/rust/rmp_serde/v0_13/crate/src/lib.rs deleted file mode 100644 index d7ad7da3fab9..000000000000 --- a/third_party/rust/rmp_serde/v0_13/crate/src/lib.rs +++ /dev/null @@ -1,341 +0,0 @@ -//! # Type-based Serialization and Deserialization -//! -//! Serde provides a mechanism for low boilerplate serialization & deserialization of values to and -//! from MessagePack via the serialization API. To be able to serialize a piece of data, it must -//! implement the `serde::Serialize` trait. To be able to deserialize a piece of data, it must -//! implement the `serde::Deserialize` trait. Serde provides provides an annotation to -//! automatically generate the code for these traits: `#[derive(Serialize, Deserialize)]`. -//! -//! # Examples -//! -//! Let's try to encode and decode some built-in types. -//! -//! ```rust -//! extern crate serde; -//! extern crate rmp_serde as rmps; -//! -//! use serde::{Deserialize, Serialize}; -//! use rmps::{Deserializer, Serializer}; -//! -//! fn main() { -//! let mut buf = Vec::new(); -//! let val = (42u8, "the Answer"); -//! val.serialize(&mut Serializer::new(&mut buf)).unwrap(); -//! -//! assert_eq!(vec![0x92, 0x2a, 0xaa, 0x74, 0x68, 0x65, 0x20, 0x41, 0x6e, 0x73, 0x77, 0x65, 0x72], buf); -//! -//! let mut de = Deserializer::new(&buf[..]); -//! assert_eq!((42, "the Answer".to_owned()), Deserialize::deserialize(&mut de).unwrap()); -//! } -//! ``` -//! -//! No one gonna hurt if we add some reflection magic. -//! -//! ```ignore -//! extern crate serde; -//! #[macro_use] -//! extern crate serde_derive; -//! extern crate rmp_serde as rmps; -//! -//! use std::collections::HashMap; -//! use serde::{Deserialize, Serialize}; -//! use rmps::{Deserializer, Serializer}; -//! -//! #[derive(Debug, PartialEq, Deserialize, Serialize)] -//! struct Human { -//! age: u32, -//! name: String, -//! } -//! -//! fn main() { -//! let mut buf = Vec::new(); -//! let val = Human { -//! age: 42, -//! name: "John".into(), -//! }; -//! -//! val.serialize(&mut Serializer::new(&mut buf)).unwrap(); -//! } -//! ``` - -extern crate rmp; -extern crate byteorder; -#[macro_use] -extern crate serde; - -use std::fmt::{self, Display, Formatter}; -use std::mem; -use std::str::{self, Utf8Error}; - -use serde::Serialize; -use serde::de::{self, Deserialize}; - -pub use decode::{Deserializer, from_slice, from_read}; -pub use encode::{Serializer, to_vec, to_vec_named}; - -pub mod decode; -pub mod encode; - -/// Helper that allows both to encode and decode strings no matter whether they contain valid or -/// invalid UTF-8. -/// -/// Regardless of validity the UTF-8 content this type will always be serialized as a string. -#[derive(Clone, Debug, PartialEq)] -pub struct Raw { - s: Result, Utf8Error)>, -} - -impl Raw { - /// Constructs a new `Raw` from the UTF-8 string. - pub fn new(v: String) -> Self { - Self { s: Ok(v) } - } - - /// Converts a vector of bytes to a `Raw`. - pub fn from_utf8(v: Vec) -> Self { - match String::from_utf8(v) { - Ok(v) => Raw::new(v), - Err(err) => { - let e = err.utf8_error(); - Self { - s: Err((err.into_bytes(), e)) - } - } - } - } - - /// Returns `true` if the raw is valid UTF-8. - pub fn is_str(&self) -> bool { - self.s.is_ok() - } - - /// Returns `true` if the raw contains invalid UTF-8 sequence. - pub fn is_err(&self) -> bool { - self.s.is_err() - } - - /// Returns the string reference if the raw is valid UTF-8, or else `None`. - pub fn as_str(&self) -> Option<&str> { - match self.s { - Ok(ref s) => Some(s.as_str()), - Err(..) => None, - } - } - - /// Returns the underlying `Utf8Error` if the raw contains invalid UTF-8 sequence, or - /// else `None`. - pub fn as_err(&self) -> Option<&Utf8Error> { - match self.s { - Ok(..) => None, - Err((_, ref err)) => Some(&err), - } - } - - /// Returns a byte slice of this raw's contents. - pub fn as_bytes(&self) -> &[u8] { - match self.s { - Ok(ref s) => s.as_bytes(), - Err(ref err) => &err.0[..], - } - } - - /// Consumes this object, yielding the string if the raw is valid UTF-8, or else `None`. - pub fn into_str(self) -> Option { - self.s.ok() - } - - /// Converts a `Raw` into a byte vector. - pub fn into_bytes(self) -> Vec { - match self.s { - Ok(s) => s.into_bytes(), - Err(err) => err.0, - } - } -} - -impl Serialize for Raw { - fn serialize(&self, se: S) -> Result - where - S: serde::Serializer - { - let s = match self.s { - Ok(ref s) => s.as_str(), - Err((ref b, ..)) => unsafe { mem::transmute(&b[..]) }, - }; - - se.serialize_str(s) - } -} - -struct RawVisitor; - -impl<'de> de::Visitor<'de> for RawVisitor { - type Value = Raw; - - fn expecting(&self, fmt: &mut Formatter) -> Result<(), fmt::Error> { - "string or bytes".fmt(fmt) - } - - #[inline] - fn visit_string(self, v: String) -> Result { - Ok(Raw { s: Ok(v) }) - } - - #[inline] - fn visit_str(self, v: &str) -> Result - where E: de::Error - { - Ok(Raw { s: Ok(v.into()) }) - } - - #[inline] - fn visit_bytes(self, v: &[u8]) -> Result - where E: de::Error - { - let s = match str::from_utf8(v) { - Ok(s) => Ok(s.into()), - Err(err) => Err((v.into(), err)), - }; - - Ok(Raw { s: s }) - } - - #[inline] - fn visit_byte_buf(self, v: Vec) -> Result - where E: de::Error - { - let s = match String::from_utf8(v) { - Ok(s) => Ok(s), - Err(err) => { - let e = err.utf8_error(); - Err((err.into_bytes(), e)) - } - }; - - Ok(Raw { s: s }) - } -} - -impl<'de> Deserialize<'de> for Raw { - #[inline] - fn deserialize(de: D) -> Result - where D: de::Deserializer<'de> - { - de.deserialize_any(RawVisitor) - } -} - -/// Helper that allows both to encode and decode strings no matter whether they contain valid or -/// invalid UTF-8. -/// -/// Regardless of validity the UTF-8 content this type will always be serialized as a string. -#[derive(Clone, Copy, Debug, PartialEq)] -pub struct RawRef<'a> { - s: Result<&'a str, (&'a [u8], Utf8Error)>, -} - -impl<'a> RawRef<'a> { - /// Constructs a new `RawRef` from the UTF-8 string. - pub fn new(v: &'a str) -> Self { - Self { s: Ok(v) } - } - - /// Converts a vector of bytes to a `RawRef`. - pub fn from_utf8(v: &'a [u8]) -> Self { - match str::from_utf8(v) { - Ok(v) => RawRef::new(v), - Err(err) => { - Self { - s: Err((v, err)) - } - } - } - } - - /// Returns `true` if the raw is valid UTF-8. - pub fn is_str(&self) -> bool { - self.s.is_ok() - } - - /// Returns `true` if the raw contains invalid UTF-8 sequence. - pub fn is_err(&self) -> bool { - self.s.is_err() - } - - /// Returns the string reference if the raw is valid UTF-8, or else `None`. - pub fn as_str(&self) -> Option<&str> { - match self.s { - Ok(ref s) => Some(s), - Err(..) => None, - } - } - - /// Returns the underlying `Utf8Error` if the raw contains invalid UTF-8 sequence, or - /// else `None`. - pub fn as_err(&self) -> Option<&Utf8Error> { - match self.s { - Ok(..) => None, - Err((_, ref err)) => Some(&err), - } - } - - /// Returns a byte slice of this raw's contents. - pub fn as_bytes(&self) -> &[u8] { - match self.s { - Ok(ref s) => s.as_bytes(), - Err(ref err) => &err.0[..], - } - } -} - -impl<'a> Serialize for RawRef<'a> { - fn serialize(&self, se: S) -> Result - where - S: serde::Serializer - { - let s = match self.s { - Ok(ref s) => s, - Err((ref b, ..)) => unsafe { mem::transmute(b) }, - }; - - se.serialize_str(s) - } -} - -struct RawRefVisitor; - -impl<'de> de::Visitor<'de> for RawRefVisitor { - type Value = RawRef<'de>; - - fn expecting(&self, fmt: &mut Formatter) -> Result<(), fmt::Error> { - "string or bytes".fmt(fmt) - } - - #[inline] - fn visit_borrowed_str(self, v: &'de str) -> Result - where E: de::Error - { - Ok(RawRef { s: Ok(v) }) - } - - #[inline] - fn visit_borrowed_bytes(self, v: &'de [u8]) -> Result - where E: de::Error - { - let s = match str::from_utf8(v) { - Ok(s) => Ok(s), - Err(err) => Err((v, err)), - }; - - Ok(RawRef { s: s }) - } -} - -impl<'de> Deserialize<'de> for RawRef<'de> { - #[inline] - fn deserialize(de: D) -> Result - where D: de::Deserializer<'de> - { - de.deserialize_any(RawRefVisitor) - } -} diff --git a/third_party/rust/rmp_serde/v0_13/crate/tests/decode.rs b/third_party/rust/rmp_serde/v0_13/crate/tests/decode.rs deleted file mode 100644 index 0322c427746b..000000000000 --- a/third_party/rust/rmp_serde/v0_13/crate/tests/decode.rs +++ /dev/null @@ -1,509 +0,0 @@ -extern crate serde; -extern crate serde_bytes; -extern crate rmp; -extern crate rmp_serde as rmps; - -use std::io::Cursor; -use std::fmt::{self, Formatter}; - -use serde::de; -use serde::Deserialize; - -use rmp::Marker; -use rmps::{Deserializer, Raw, RawRef}; -use rmps::decode::{self, Error}; - -#[test] -fn pass_nil() { - let buf = [0xc0]; - let mut de = Deserializer::new(&buf[..]); - assert_eq!((), Deserialize::deserialize(&mut de).unwrap()); -} - -#[test] -fn fail_nil_from_reserved() { - let buf = [0xc1]; - let mut de = Deserializer::new(&buf[..]); - - let res: Result<(), Error> = Deserialize::deserialize(&mut de); - match res.err() { - Some(Error::TypeMismatch(Marker::Reserved)) => (), - other => panic!("unexpected result: {:?}", other) - } -} - -#[test] -fn pass_bool() { - let buf = [0xc3, 0xc2]; - let mut de = Deserializer::new(&buf[..]); - - assert_eq!(true, Deserialize::deserialize(&mut de).unwrap()); - assert_eq!(false, Deserialize::deserialize(&mut de).unwrap()); -} - -#[test] -fn fail_bool_from_fixint() { - let buf = [0x00]; - let cur = Cursor::new(&buf[..]); - - let mut deserializer = Deserializer::new(cur); - - let res: Result = Deserialize::deserialize(&mut deserializer); - match res.err().unwrap() { - Error::Syntax(..) => (), - other => panic!("unexpected result: {:?}", other) - } -} - -#[test] -fn pass_u64() { - let buf = [0xcf, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff]; - let cur = Cursor::new(&buf[..]); - - let mut de = Deserializer::new(cur); - - assert_eq!(18446744073709551615u64, Deserialize::deserialize(&mut de).unwrap()); -} - -#[test] -fn pass_u32() { - let buf = [0xce, 0xff, 0xff, 0xff, 0xff]; - let cur = Cursor::new(&buf[..]); - - let mut de = Deserializer::new(cur); - - assert_eq!(4294967295u32, Deserialize::deserialize(&mut de).unwrap()); -} - -#[test] -fn fail_u32_from_u64() { - let buf = [0xcf, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff]; - let cur = Cursor::new(&buf[..]); - - let mut de = Deserializer::new(cur); - - let res: Result = Deserialize::deserialize(&mut de); - match res.err().unwrap() { - Error::Syntax(..) => (), - other => panic!("unexpected result: {:?}", other) - } -} - -#[test] -fn pass_u16() { - let buf = [0xcd, 0xff, 0xff]; - let cur = Cursor::new(&buf[..]); - - let mut de = Deserializer::new(cur); - - assert_eq!(65535u16, Deserialize::deserialize(&mut de).unwrap()); -} - -#[test] -fn pass_u8() { - let buf = [0xcc, 0xff]; - let cur = Cursor::new(&buf[..]); - - let mut de = Deserializer::new(cur); - - assert_eq!(255u8, Deserialize::deserialize(&mut de).unwrap()); -} - -#[test] -fn pass_u8_from_64() { - let buf = [0xcf, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2a]; - let cur = Cursor::new(&buf[..]); - - let mut de = Deserializer::new(cur); - - assert_eq!(42u8, Deserialize::deserialize(&mut de).unwrap()); -} - -#[test] -fn pass_usize() { - let buf = [0xcc, 0xff]; - let cur = Cursor::new(&buf[..]); - - let mut de = Deserializer::new(cur); - - assert_eq!(255usize, Deserialize::deserialize(&mut de).unwrap()); -} - -#[test] -fn pass_i64() { - let buf = [0xd3, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff]; - let cur = Cursor::new(&buf[..]); - - let mut de = Deserializer::new(cur); - - assert_eq!(9223372036854775807i64, Deserialize::deserialize(&mut de).unwrap()); -} - -#[test] -fn pass_i32() { - let buf = [0xd2, 0x7f, 0xff, 0xff, 0xff]; - let cur = Cursor::new(&buf[..]); - - let mut de = Deserializer::new(cur); - - assert_eq!(2147483647i32, Deserialize::deserialize(&mut de).unwrap()); -} - -#[test] -fn pass_i16() { - let buf = [0xd1, 0x7f, 0xff]; - let cur = Cursor::new(&buf[..]); - - let mut de = Deserializer::new(cur); - - assert_eq!(32767i16, Deserialize::deserialize(&mut de).unwrap()); -} - -#[test] -fn pass_i8() { - let buf = [0xd0, 0x7f]; - let cur = Cursor::new(&buf[..]); - - let mut de = Deserializer::new(cur); - - assert_eq!(127i8, Deserialize::deserialize(&mut de).unwrap()); -} - -#[test] -fn pass_isize() { - let buf = [0xd0, 0x7f]; - let cur = Cursor::new(&buf[..]); - - let mut de = Deserializer::new(cur); - - assert_eq!(127isize, Deserialize::deserialize(&mut de).unwrap()); -} - -#[test] -fn pass_f32() { - let buf = [0xca, 0x7f, 0x7f, 0xff, 0xff]; - let cur = Cursor::new(&buf[..]); - - let mut de = Deserializer::new(cur); - - assert_eq!(3.4028234e38_f32, Deserialize::deserialize(&mut de).unwrap()); -} - -#[test] -fn pass_f64() { - let buf = [0xcb, 0x40, 0x45, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; - let cur = Cursor::new(&buf[..]); - - let mut de = Deserializer::new(cur); - - assert_eq!(42f64, Deserialize::deserialize(&mut de).unwrap()); -} - -#[test] -fn pass_string() { - let buf = [0xaa, 0x6c, 0x65, 0x20, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65]; - let cur = Cursor::new(&buf[..]); - - let mut de = Deserializer::new(cur); - let actual: String = Deserialize::deserialize(&mut de).unwrap(); - - assert_eq!("le message".to_string(), actual); -} - -#[test] -fn pass_tuple() { - let buf = [0x92, 0x2a, 0xce, 0x0, 0x1, 0x88, 0x94]; - let cur = Cursor::new(&buf[..]); - - let mut de = Deserializer::new(cur); - let actual: (u32, u32) = Deserialize::deserialize(&mut de).unwrap(); - - assert_eq!((42, 100500), actual); -} - -#[ignore] -#[test] -fn fail_tuple_len_mismatch() { - let buf = [0x92, 0x2a, 0xce, 0x0, 0x1, 0x88, 0x94]; - let cur = Cursor::new(&buf[..]); - - let mut de = Deserializer::new(cur); - let actual: Result<(u32,), Error> = Deserialize::deserialize(&mut de); - - match actual.err().unwrap() { - Error::LengthMismatch(1) => (), - other => panic!("unexpected result: {:?}", other) - } -} - -#[test] -fn pass_option_some() { - let buf = [0x1f]; - - let mut de = Deserializer::new(&buf[..]); - let actual: Option = Deserialize::deserialize(&mut de).unwrap(); - assert_eq!(Some(31), actual); -} - -#[test] -fn pass_option_none() { - let buf = [0xc0]; - - let mut de = Deserializer::new(&buf[..]); - let actual: Option = Deserialize::deserialize(&mut de).unwrap(); - assert_eq!(None, actual); -} - -#[test] -fn fail_option_u8_from_reserved() { - let buf = [0xc1]; - let cur = Cursor::new(&buf[..]); - - let mut de = Deserializer::new(cur); - let actual: Result, Error> = Deserialize::deserialize(&mut de); - match actual.err() { - Some(Error::TypeMismatch(Marker::Reserved)) => (), - other => panic!("unexpected result: {:?}", other) - } -} - -#[test] -fn pass_vector() { - let buf = [0x92, 0x00, 0xcc, 0x80]; - let cur = Cursor::new(&buf[..]); - - let mut de = Deserializer::new(cur); - let actual: Vec = Deserialize::deserialize(&mut de).unwrap(); - assert_eq!(vec![0, 128], actual); -} - -#[test] -fn pass_map() { - use std::collections::HashMap; - - let buf = [ - 0x82, // 2 (size) - 0xa3, 0x69, 0x6e, 0x74, // 'int' - 0xcc, 0x80, // 128 - 0xa3, 0x6b, 0x65, 0x79, // 'key' - 0x2a // 42 - ]; - let cur = Cursor::new(&buf[..]); - - let mut de = Deserializer::new(cur); - let actual = Deserialize::deserialize(&mut de).unwrap(); - let mut expected = HashMap::new(); - expected.insert("int".to_string(), 128); - expected.insert("key".to_string(), 42); - - assert_eq!(expected, actual); -} - -// TODO: Merge three of them. -#[test] -fn pass_bin8_into_bytebuf() { - use serde_bytes::ByteBuf; - - let buf = [0xc4, 0x02, 0xcc, 0x80]; - let cur = Cursor::new(&buf[..]); - - let mut de = Deserializer::new(cur); - let actual: ByteBuf = Deserialize::deserialize(&mut de).unwrap(); - let actual: Vec = actual.into(); - - assert_eq!(vec![0xcc, 0x80], actual); -} - -#[test] -fn pass_bin16_into_bytebuf() { - use serde_bytes::ByteBuf; - - let buf = [0xc5, 0x00, 0x02, 0xcc, 0x80]; - let cur = Cursor::new(&buf[..]); - - let mut de = Deserializer::new(cur); - let actual: ByteBuf = Deserialize::deserialize(&mut de).unwrap(); - let actual: Vec = actual.into(); - - assert_eq!(vec![0xcc, 0x80], actual); -} - -#[test] -fn pass_bin32_into_bytebuf() { - use serde_bytes::ByteBuf; - - let buf = [0xc6, 0x00, 0x00, 0x00, 0x02, 0xcc, 0x80]; - let cur = Cursor::new(&buf[..]); - - let mut de = Deserializer::new(cur); - let actual: ByteBuf = Deserialize::deserialize(&mut de).unwrap(); - let actual: Vec = actual.into(); - - assert_eq!(vec![0xcc, 0x80], actual); -} - -#[test] -fn pass_bin8_into_bytebuf_regression_growing_buffer() { - use serde_bytes::ByteBuf; - - // Try to deserialize large buf and a small buf - let buf = [0x92, 0xc4, 0x04, 0x71, 0x75, 0x75, 0x78, 0xc4, 0x03, 0x62, 0x61, 0x72]; - let cur = Cursor::new(&buf[..]); - - let mut de = Deserializer::new(cur); - let (large, small): (ByteBuf, ByteBuf) = Deserialize::deserialize(&mut de).unwrap(); - let (large, small): (Vec, Vec) = (large.into(), small.into()); - - assert_eq!((b"quux".to_vec(), b"bar".to_vec()), (large, small)); -} - -#[test] -fn test_deserialize_numeric() { - #[derive(Debug, PartialEq)] - enum FloatOrInteger { - Float(f64), - Integer(u64), - } - - impl<'de> de::Deserialize<'de> for FloatOrInteger { - fn deserialize(de: D) -> Result - where D: de::Deserializer<'de> - { - struct FloatOrIntegerVisitor; - - impl<'de> de::Visitor<'de> for FloatOrIntegerVisitor { - type Value = FloatOrInteger; - - fn expecting(&self, fmt: &mut Formatter) -> Result<(), fmt::Error> { - write!(fmt, "either a float or an integer") - } - - fn visit_u64(self, value: u64) -> Result { - Ok(FloatOrInteger::Integer(value)) - } - - fn visit_f64(self, value: f64) -> Result { - Ok(FloatOrInteger::Float(value)) - } - } - de.deserialize_any(FloatOrIntegerVisitor) - } - } - - let buf = [203, 64, 36, 102, 102, 102, 102, 102, 102]; // 10.2 - let mut de = Deserializer::new(&buf[..]); - let x: FloatOrInteger = Deserialize::deserialize(&mut de).unwrap(); - assert_eq!(x, FloatOrInteger::Float(10.2)); - - let buf = [36]; // 36 - let mut de = Deserializer::new(&buf[..]); - let x: FloatOrInteger = Deserialize::deserialize(&mut de).unwrap(); - assert_eq!(x, FloatOrInteger::Integer(36)); -} - -#[test] -fn pass_deserializer_get_ref() { - let buf = [0xc0]; - let cur = Cursor::new(&buf[..]); - - let mut de = Deserializer::new(cur); - - assert_eq!((), Deserialize::deserialize(&mut de).unwrap()); - assert_eq!(1, de.get_ref().position()); -} - -#[test] -fn pass_deserializer_get_mut() { - let buf = [0xc0]; - let cur = Cursor::new(&buf[..]); - - let mut de = Deserializer::new(cur); - - assert_eq!((), Deserialize::deserialize(&mut de).unwrap()); - de.get_mut().set_position(0); - - assert_eq!((), Deserialize::deserialize(&mut de).unwrap()); -} - -#[test] -fn pass_deserializer_into_inner() { - let buf = [0xc0]; - let cur = Cursor::new(&buf[..]); - - let mut de = Deserializer::new(cur); - - assert_eq!((), Deserialize::deserialize(&mut de).unwrap()); - let cur = de.into_inner(); - - assert_eq!(1, cur.position()); -} - -#[test] -fn pass_deserializer_cursor_position() { - let mut de = Deserializer::new(Cursor::new(vec![0xce, 0xff, 0xff, 0xff, 0xff])); - - assert_eq!(4294967295u32, Deserialize::deserialize(&mut de).unwrap()); - assert_eq!(5, de.position()); -} - -#[test] -fn pass_from() { - assert_eq!(2147483647, decode::from_read(&[0xd2, 0x7f, 0xff, 0xff, 0xff][..]).unwrap()); -} - -#[test] -fn pass_raw_valid_utf8() { - let buf = vec![0xa3, 0x6b, 0x65, 0x79]; - let raw: Raw = rmps::from_slice(&buf[..]).unwrap(); - - assert!(raw.is_str()); - assert_eq!("key", raw.as_str().unwrap()); - assert_eq!([0x6b, 0x65, 0x79], raw.as_bytes()); -} - -#[test] -fn pass_raw_invalid_utf8() { - // >>> msgpack.dumps(msgpack.dumps([200, []])) - // '\xa4\x92\xcc\xc8\x90' - let buf = vec![0xa4, 0x92, 0xcc, 0xc8, 0x90]; - let raw: Raw = rmps::from_slice(&buf[..]).unwrap(); - - assert!(raw.is_err()); - assert_eq!(0, raw.as_err().unwrap().valid_up_to()); - assert_eq!([0x92, 0xcc, 0xc8, 0x90], raw.as_bytes()); -} - -#[test] -fn pass_raw_ref_valid_utf8() { - let buf = vec![0xa3, 0x6b, 0x65, 0x79]; - let raw: RawRef = rmps::from_slice(&buf[..]).unwrap(); - - assert!(raw.is_str()); - assert_eq!("key", raw.as_str().unwrap()); - assert_eq!([0x6b, 0x65, 0x79], raw.as_bytes()); -} - -#[test] -fn pass_raw_ref_invalid_utf8() { - // >>> msgpack.dumps(msgpack.dumps([200, []])) - // '\xa4\x92\xcc\xc8\x90' - let buf = vec![0xa4, 0x92, 0xcc, 0xc8, 0x90]; - let raw: RawRef = rmps::from_slice(&buf[..]).unwrap(); - - assert!(raw.is_err()); - assert_eq!(0, raw.as_err().unwrap().valid_up_to()); - assert_eq!([0x92, 0xcc, 0xc8, 0x90], raw.as_bytes()); -} - -#[test] -fn fail_str_invalid_utf8() { - let buf = vec![0xa4, 0x92, 0xcc, 0xc8, 0x90]; - let err: Result = rmps::from_slice(&buf[..]); - - assert!(err.is_err()); - match err.err().unwrap() { - decode::Error::Utf8Error(err) => assert_eq!(0, err.valid_up_to()), - // decode::Error::Syntax(err) => {} - err => panic!("unexpected error: {:?}", err), - } -} diff --git a/third_party/rust/rmp_serde/v0_13/crate/tests/decode_derive.rs b/third_party/rust/rmp_serde/v0_13/crate/tests/decode_derive.rs deleted file mode 100644 index 436bd59baa17..000000000000 --- a/third_party/rust/rmp_serde/v0_13/crate/tests/decode_derive.rs +++ /dev/null @@ -1,327 +0,0 @@ -extern crate serde; -#[macro_use] -extern crate serde_derive; -extern crate rmp; -extern crate rmp_serde as rmps; - -use std::io::Cursor; - -use serde::Deserialize; - -use rmps::Deserializer; -use rmps::decode::Error; - -#[test] -fn pass_newtype() { - let buf = [0x91, 0x2a]; - let cur = Cursor::new(&buf[..]); - - #[derive(Debug, PartialEq, Deserialize)] - struct Struct(u32); - - let mut de = Deserializer::new(cur); - let actual: Struct = Deserialize::deserialize(&mut de).unwrap(); - - assert_eq!(Struct(42), actual); -} - -#[test] -fn pass_tuple_struct() { - let buf = [0x92, 0x2a, 0xce, 0x0, 0x1, 0x88, 0x94]; - let cur = Cursor::new(&buf[..]); - - #[derive(Debug, PartialEq, Deserialize)] - struct Decoded(u32, u32); - - let mut de = Deserializer::new(cur); - let actual: Decoded = Deserialize::deserialize(&mut de).unwrap(); - - assert_eq!(Decoded(42, 100500), actual); -} - -#[test] -fn pass_struct() { - let buf = [0x92, 0x2a, 0xce, 0x0, 0x1, 0x88, 0x94]; - let cur = Cursor::new(&buf[..]); - - #[derive(Debug, PartialEq, Deserialize)] - struct Decoded { - id: u32, - value: u32 - }; - - let mut de = Deserializer::new(cur); - let actual: Decoded = Deserialize::deserialize(&mut de).unwrap(); - - assert_eq!(Decoded { id: 42, value: 100500 }, actual); -} - -#[test] -fn pass_struct_from_map() { - #[derive(Debug, PartialEq, Deserialize)] - struct Struct { - et: String, - le: u8, - shit: u8, - } - - let buf = [ - 0x83, // 3 (size) - 0xa2, 0x65, 0x74, // "et" - 0xa5, 0x76, 0x6f, 0x69, 0x6c, 0x61, // "voila" - 0xa2, 0x6c, 0x65, // "le" - 0x00, // 0 - 0xa4, 0x73, 0x68, 0x69, 0x74, // "shit" - 0x01, // 1 - ]; - let cur = Cursor::new(&buf[..]); - - // It appears no special behavior is needed for deserializing structs encoded as maps. - let mut de = Deserializer::new(cur); - let actual: Struct = Deserialize::deserialize(&mut de).unwrap(); - let expected = Struct { et: "voila".into(), le: 0, shit: 1 }; - - assert_eq!(expected, actual); -} - -#[test] -fn pass_unit_variant() { - // We expect enums to be encoded as [id, [...]] - - let buf = [0x92, 0x01, 0x90]; - let cur = Cursor::new(&buf[..]); - - #[derive(Debug, PartialEq, Deserialize)] - enum Enum { - A, - B, - } - - let mut de = Deserializer::new(cur); - let actual: Enum = Deserialize::deserialize(&mut de).unwrap(); - - assert_eq!(Enum::B, actual); - assert_eq!(3, de.get_ref().position()); -} - -#[test] -fn pass_tuple_enum_with_arg() { - // The encoded byte-array is: [1, [42]]. - let buf = [0x92, 0x01, 0x91, 0x2a]; - let cur = Cursor::new(&buf[..]); - - #[derive(Debug, PartialEq, Deserialize)] - enum Enum { - A, - B(u32), - } - - let mut de = Deserializer::new(cur); - let actual: Enum = Deserialize::deserialize(&mut de).unwrap(); - - assert_eq!(Enum::B(42), actual); - assert_eq!(4, de.get_ref().position()) -} - -#[test] -fn pass_tuple_enum_with_args() { - // The encoded bytearray is: [1, [42, 58]]. - let buf = [0x92, 0x01, 0x92, 0x2a, 0x3a]; - let cur = Cursor::new(&buf[..]); - - #[derive(Debug, PartialEq, Deserialize)] - enum Enum { - A, - B(u32, u32), - } - - let mut de = Deserializer::new(cur); - let actual: Enum = Deserialize::deserialize(&mut de).unwrap(); - - assert_eq!(Enum::B(42, 58), actual); - assert_eq!(5, de.get_ref().position()) -} - -#[test] -fn fail_enum_sequence_mismatch() { - // The encoded bytearray is: [1, 2, 100500]. - let buf = [0x93, 0x1, 0x2a, 0xce, 0x0, 0x1, 0x88, 0x94]; - let cur = Cursor::new(&buf[..]); - - #[derive(Debug, PartialEq, Deserialize)] - enum Enum { - A, - B, - } - - let mut de = Deserializer::new(cur); - let actual: Result = Deserialize::deserialize(&mut de); - - match actual.err().unwrap() { - Error::LengthMismatch(3) => (), - other => panic!("unexpected result: {:?}", other) - } -} - -#[test] -fn fail_enum_overflow() { - // The encoded bytearray is: [1, [42]]. - let buf = [0x92, 0x01, 0x2a]; - let cur = Cursor::new(&buf[..]); - - #[derive(Debug, PartialEq, Deserialize)] - // TODO: Rename to Enum: A, B, C, ... - enum Enum { - A, - } - - let mut de = Deserializer::new(cur); - let actual: Result = Deserialize::deserialize(&mut de); - - match actual.err().unwrap() { - Error::Syntax(..) => (), - other => panic!("unexpected result: {:?}", other) - } -} - -#[test] -fn pass_struct_enum_with_arg() { - // The encoded bytearray is: [1, [42]]. - let buf = [0x92, 0x01, 0x91, 0x2a]; - let cur = Cursor::new(&buf[..]); - - #[derive(Debug, PartialEq, Deserialize)] - enum Enum { - A, - B { id: u32 }, - } - - let mut de = Deserializer::new(cur); - let actual: Enum = Deserialize::deserialize(&mut de).unwrap(); - - assert_eq!(Enum::B { id: 42 }, actual); - assert_eq!(4, de.get_ref().position()) -} - -#[test] -fn pass_newtype_variant() { - // The encoded bytearray is: [0, [['le message']]]. - let buf = [0x92, 0x0, 0x91, 0x91, 0xaa, 0x6c, 0x65, 0x20, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65]; - let cur = Cursor::new(&buf[..]); - - #[derive(Debug, PartialEq, Deserialize)] - struct Nested(String); - - #[derive(Debug, PartialEq, Deserialize)] - enum Enum { - A(Nested), - B, - } - - let mut de = Deserializer::new(cur); - let actual: Enum = Deserialize::deserialize(&mut de).unwrap(); - - assert_eq!(Enum::A(Nested("le message".into())), actual); - assert_eq!(buf.len() as u64, de.get_ref().position()) -} - -#[cfg(disabled)] // This test doesn't actually compile anymore -#[test] -fn pass_enum_custom_policy() { - use std::io::Read; - use rmp_serde::decode::VariantVisitor; - - // We expect enums to be endoded as id, [...] (without wrapping tuple). - - let buf = [0x01, 0x90]; - let cur = Cursor::new(&buf[..]); - - #[derive(Debug, PartialEq, Deserialize)] - enum Enum { - A, - B, - } - - struct CustomDeserializer { - inner: Deserializer, - } - - impl serde::Deserializer for CustomDeserializer { - type Error = Error; - - fn deserialize(&mut self, visitor: V) -> Result - where V: serde::de::Visitor - { - self.inner.deserialize(visitor) - } - - fn deserialize_enum(&mut self, _enum: &str, _variants: &'static [&'static str], mut visitor: V) - -> Result - where V: serde::de::EnumVisitor - { - visitor.visit(VariantVisitor::new(&mut self.inner)) - } - - forward_to_deserialize! { - bool usize u8 u16 u32 u64 isize i8 i16 i32 i64 f32 f64 char str string unit seq - seq_fixed_size bytes map tuple_struct unit_struct struct struct_field - tuple option newtype_struct ignored_any - } - } - - let mut de = CustomDeserializer { inner: Deserializer::new(cur) }; - let actual: Enum = Deserialize::deserialize(&mut de).unwrap(); - - assert_eq!(Enum::B, actual); - assert_eq!(2, de.inner.get_ref().position()); -} - -#[test] -fn pass_struct_variant() { - #[derive(Debug, PartialEq, Deserialize)] - enum Custom { - First { data: u32 }, - Second { data: u32 }, - } - let out_first = vec![0x92, 0x00, 0x91, 0x2a]; - let out_second = vec![0x92, 0x01, 0x91, 0x2a]; - - for (expected, out) in vec![(Custom::First{ data: 42 }, out_first), (Custom::Second { data: 42 }, out_second)] { - let mut de = Deserializer::new(Cursor::new(&out[..])); - let val: Custom = Deserialize::deserialize(&mut de).unwrap(); - assert_eq!(expected, val); - } -} - -#[test] -fn pass_enum_with_one_arg() { - // The encoded bytearray is: [0, [[1, 2]]]. - let buf = [0x92, 0x0, 0x91, 0x92, 0x01, 0x02]; - let cur = Cursor::new(&buf[..]); - - #[derive(Debug, PartialEq, Deserialize)] - enum Enum { - V1(Vec), - V2, - } - - let mut de = Deserializer::new(cur); - let actual: Enum = Deserialize::deserialize(&mut de).unwrap(); - - assert_eq!(Enum::V1(vec![1, 2]), actual); - assert_eq!(buf.len() as u64, de.get_ref().position()) -} - -#[test] -fn pass_from_slice() { - let buf = [0x93, 0xa4, 0x4a, 0x6f, 0x68, 0x6e, 0xa5, 0x53, 0x6d, 0x69, 0x74, 0x68, 0x2a]; - - #[derive(Debug, PartialEq, Deserialize)] - struct Person<'a> { - name: &'a str, - surname: &'a str, - age: u8, - } - - assert_eq!(Person { name: "John", surname: "Smith", age: 42 }, rmps::from_slice(&buf[..]).unwrap()); -} diff --git a/third_party/rust/rmp_serde/v0_13/crate/tests/encode.rs b/third_party/rust/rmp_serde/v0_13/crate/tests/encode.rs deleted file mode 100644 index a20eb32b2765..000000000000 --- a/third_party/rust/rmp_serde/v0_13/crate/tests/encode.rs +++ /dev/null @@ -1,367 +0,0 @@ -extern crate serde; -extern crate serde_bytes; -extern crate rmp; -extern crate rmp_serde as rmps; - -use std::io::Cursor; - -use serde::Serialize; - -use rmps::{Raw, RawRef, Serializer}; -use rmps::encode::{self, Error}; - -#[test] -fn pass_null() { - let mut buf = [0x00]; - - let val = (); - val.serialize(&mut Serializer::new(&mut &mut buf[..])).ok().unwrap(); - - assert_eq!([0xc0], buf); -} - -#[test] -fn fail_null() { - let mut buf = []; - - let val = (); - - match val.serialize(&mut Serializer::new(&mut &mut buf[..])) { - Err(Error::InvalidValueWrite(..)) => (), - other => panic!("unexpected result: {:?}", other) - } -} - -#[test] -fn pass_bool() { - let mut buf = [0x00, 0x00]; - - { - let mut cur = Cursor::new(&mut buf[..]); - - let mut encoder = Serializer::new(&mut cur); - - let val = true; - val.serialize(&mut encoder).ok().unwrap(); - let val = false; - val.serialize(&mut encoder).ok().unwrap(); - } - - assert_eq!([0xc3, 0xc2], buf); -} - -#[test] -fn pass_usize() { - let mut buf = [0x00, 0x00]; - - let val = 255usize; - val.serialize(&mut Serializer::new(&mut &mut buf[..])).ok().unwrap(); - - assert_eq!([0xcc, 0xff], buf); -} - -#[test] -fn pass_u8() { - let mut buf = [0x00, 0x00]; - - let val = 255u8; - val.serialize(&mut Serializer::new(&mut &mut buf[..])).ok().unwrap(); - - assert_eq!([0xcc, 0xff], buf); -} - -#[test] -fn pass_u16() { - let mut buf = [0x00, 0x00, 0x00]; - - let val = 65535u16; - val.serialize(&mut Serializer::new(&mut &mut buf[..])).ok().unwrap(); - - assert_eq!([0xcd, 0xff, 0xff], buf); -} - -#[test] -fn pass_u32() { - let mut buf = [0x00, 0x00, 0x00, 0x00, 0x00]; - - let val = 4294967295u32; - val.serialize(&mut Serializer::new(&mut &mut buf[..])).ok().unwrap(); - - assert_eq!([0xce, 0xff, 0xff, 0xff, 0xff], buf); -} - -#[test] -fn pass_u64() { - let mut buf = [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; - - let val = 18446744073709551615u64; - val.serialize(&mut Serializer::new(&mut &mut buf[..])).ok().unwrap(); - - assert_eq!([0xcf, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff], buf); -} - -#[test] -fn pass_isize() { - let mut buf = [0x00, 0x00]; - - let val = -128isize; - val.serialize(&mut Serializer::new(&mut &mut buf[..])).ok().unwrap(); - - assert_eq!([0xd0, 0x80], buf); -} - -#[test] -fn pass_i8() { - let mut buf = [0x00, 0x00]; - - let val = -128i8; - val.serialize(&mut Serializer::new(&mut &mut buf[..])).ok().unwrap(); - - assert_eq!([0xd0, 0x80], buf); -} - -#[test] -fn pass_i16() { - let mut buf = [0x00, 0x00, 0x00]; - - let val = -32768i16; - val.serialize(&mut Serializer::new(&mut &mut buf[..])).ok().unwrap(); - - assert_eq!([0xd1, 0x80, 0x00], buf); -} - -#[test] -fn pass_i32() { - let mut buf = [0x00, 0x00, 0x00, 0x00, 0x00]; - - let val = -2147483648i32; - val.serialize(&mut Serializer::new(&mut &mut buf[..])).ok().unwrap(); - - assert_eq!([0xd2, 0x80, 0x00, 0x00, 0x00], buf); -} - -#[test] -fn pass_i64() { - let mut buf = [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; - - let val = -9223372036854775808i64; - val.serialize(&mut Serializer::new(&mut &mut buf[..])).ok().unwrap(); - - assert_eq!([0xd3, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], buf); -} - -#[test] -fn pass_i64_most_effective() { - let mut buf = [0x00, 0x00]; - - // This value can be represented using 2 bytes although it's i64. - let val = 128i64; - val.serialize(&mut Serializer::new(&mut &mut buf[..])).unwrap(); - - assert_eq!([0xcc, 0x80], buf); -} - - -#[test] -fn pass_f32() { - let mut buf = [0x00, 0x00, 0x00, 0x00, 0x00]; - - let val = 3.4028234e38_f32; - val.serialize(&mut Serializer::new(&mut &mut buf[..])).ok().unwrap(); - - assert_eq!([0xca, 0x7f, 0x7f, 0xff, 0xff], buf); -} - -#[test] -fn pass_f64() { - let mut buf = [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; - - let val = 42f64; - val.serialize(&mut Serializer::new(&mut &mut buf[..])).ok().unwrap(); - - assert_eq!([0xcb, 0x40, 0x45, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], buf); -} - -#[test] -fn pass_char() { - let mut buf = [0x00, 0x00]; - - let val = '!'; - val.serialize(&mut Serializer::new(&mut &mut buf[..])).ok().unwrap(); - - assert_eq!([0xa1, 0x21], buf); -} - - -#[test] -fn pass_string() { - let mut buf = [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; - - let val = "le message"; - val.serialize(&mut Serializer::new(&mut &mut buf[..])).ok().unwrap(); - - assert_eq!([0xaa, 0x6c, 0x65, 0x20, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65], buf); -} - -#[test] -fn pass_tuple() { - let mut buf = [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; - - let val = (42u32, 100500u32); - val.serialize(&mut Serializer::new(&mut &mut buf[..])).ok().unwrap(); - - assert_eq!([0x92, 0x2a, 0xce, 0x0, 0x1, 0x88, 0x94], buf); -} - -#[test] -fn pass_option_some() { - let mut buf = [0x00]; - - let val = Some(100u32); - val.serialize(&mut Serializer::new(&mut &mut buf[..])).ok().unwrap(); - - assert_eq!([0x64], buf); -} - -#[test] -fn pass_option_none() { - let mut buf = [0x00]; - - let val: Option = None; - val.serialize(&mut Serializer::new(&mut &mut buf[..])).ok().unwrap(); - - assert_eq!([0xc0], buf); -} - -#[test] -fn pass_seq() { - let mut buf = [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; - - let val = vec!["le", "shit"]; - val.serialize(&mut Serializer::new(&mut &mut buf[..])).ok().unwrap(); - - assert_eq!([0x92, 0xa2, 0x6c, 0x65, 0xa4, 0x73, 0x68, 0x69, 0x74], buf); -} - -#[test] -fn pass_map() { - use std::collections::BTreeMap; - - let mut buf = [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; - - let mut val = BTreeMap::new(); - val.insert(0u8, "le"); - val.insert(1u8, "shit"); - val.serialize(&mut Serializer::new(&mut &mut buf[..])).ok().unwrap(); - - let out = [ - 0x82, // 2 (size) - 0x00, // 0 - 0xa2, 0x6c, 0x65, // "le" - 0x01, // 1 - 0xa4, 0x73, 0x68, 0x69, 0x74, // "shit" - ]; - assert_eq!(out, buf); -} - -#[test] -fn pass_empty_map() { - use std::collections::BTreeMap; - - let mut buf = vec![]; - - let val: BTreeMap = BTreeMap::new(); - val.serialize(&mut Serializer::new(&mut buf)).ok().unwrap(); - - let out = vec![ - 0x80, // (size: 0) - ]; - assert_eq!(out, buf); -} - -#[test] -fn pass_encoding_struct_into_vec() { - let val = (42u8, "the Answer"); - - let mut buf: Vec = Vec::new(); - - val.serialize(&mut Serializer::new(&mut buf)).unwrap(); - - assert_eq!(vec![0x92, 0x2a, 0xaa, 0x74, 0x68, 0x65, 0x20, 0x41, 0x6e, 0x73, 0x77, 0x65, 0x72], buf); -} - -#[test] -fn pass_bin() { - use serde_bytes::Bytes; - - let mut buf = Vec::new(); - let vec = vec![0xcc, 0x80]; - let val = Bytes::from(&vec[..]); - - val.serialize(&mut Serializer::new(&mut buf)).ok().unwrap(); - - assert_eq!(vec![0xc4, 0x02, 0xcc, 0x80], buf); -} - -#[test] -fn pass_to_vec() { - assert_eq!(vec![0xc0], encode::to_vec(&()).unwrap()); - assert_eq!(vec![0xaa, 0x6c, 0x65, 0x20, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65], - encode::to_vec("le message").unwrap()); -} - -#[test] -fn get_mut() { - let mut se = Serializer::new(Vec::new()); - true.serialize(&mut se).unwrap(); - - assert_eq!(&vec![0xc3], se.get_ref()); - - se.get_mut().push(42); - assert_eq!(vec![0xc3, 42], se.into_inner()); -} - -#[test] -fn pass_raw_valid_utf8() { - let raw = Raw::new("key".into()); - - let mut buf = Vec::new(); - raw.serialize(&mut Serializer::new(&mut buf)).unwrap(); - - assert_eq!(vec![0xa3, 0x6b, 0x65, 0x79], buf); -} - -#[test] -fn pass_raw_invalid_utf8() { - // >>> msgpack.dumps(msgpack.dumps([200, []])) - // '\xa4\x92\xcc\xc8\x90' - let raw = Raw::from_utf8(vec![0x92, 0xcc, 0xc8, 0x90]); - - let mut buf = Vec::new(); - raw.serialize(&mut Serializer::new(&mut buf)).unwrap(); - - assert_eq!(vec![0xa4, 0x92, 0xcc, 0xc8, 0x90], buf); -} - -#[test] -fn pass_raw_ref_valid_utf8() { - let raw = RawRef::new("key"); - - let mut buf = Vec::new(); - raw.serialize(&mut Serializer::new(&mut buf)).unwrap(); - - assert_eq!(vec![0xa3, 0x6b, 0x65, 0x79], buf); -} - -#[test] -fn pass_raw_ref_invalid_utf8() { - // >>> msgpack.dumps(msgpack.dumps([200, []])) - // '\xa4\x92\xcc\xc8\x90' - let b = &[0x92, 0xcc, 0xc8, 0x90]; - let raw = RawRef::from_utf8(b); - - let mut buf = Vec::new(); - raw.serialize(&mut Serializer::new(&mut buf)).unwrap(); - - assert_eq!(vec![0xa4, 0x92, 0xcc, 0xc8, 0x90], buf); -} diff --git a/third_party/rust/rmp_serde/v0_13/crate/tests/encode_derive.rs b/third_party/rust/rmp_serde/v0_13/crate/tests/encode_derive.rs deleted file mode 100644 index 8e21cdc04d01..000000000000 --- a/third_party/rust/rmp_serde/v0_13/crate/tests/encode_derive.rs +++ /dev/null @@ -1,175 +0,0 @@ -extern crate serde; -#[macro_use] -extern crate serde_derive; -extern crate rmp; -extern crate rmp_serde as rmps; - -use serde::Serialize; -use rmps::Serializer; - -#[test] -fn pass_unit_struct() { - #[derive(Serialize)] - struct Unit; - - let mut buf = Vec::new(); - Unit.serialize(&mut Serializer::new(&mut buf)).unwrap(); - - // Expect: `[]`. - assert_eq!(vec![0x90], buf); -} - -#[test] -fn pass_unit_variant() { - #[derive(Serialize)] - enum Enum { - V1, - V2, - } - - let mut buf = Vec::new(); - Enum::V1.serialize(&mut Serializer::new(&mut buf)).unwrap(); - Enum::V2.serialize(&mut Serializer::new(&mut buf)).unwrap(); - - // Expect: [0, []] [1, []]. - assert_eq!(vec![0x92, 0x00, 0x90, 0x92, 0x01, 0x90], buf); -} - -#[test] -fn pass_newtype_struct() { - #[derive(Serialize)] - struct Struct(u64); - - let val = Struct(42); - let mut buf = Vec::new(); - val.serialize(&mut Serializer::new(&mut buf)).unwrap(); - - // Expect: [42]. - assert_eq!(vec![0x91, 0x2a], buf); -} - -#[test] -fn pass_newtype_variant() { - #[derive(Serialize)] - enum Enum { - V1, - V2(u64), - } - - let mut buf = Vec::new(); - Enum::V1.serialize(&mut Serializer::new(&mut buf)).unwrap(); - Enum::V2(42).serialize(&mut Serializer::new(&mut buf)).unwrap(); - - // Expect: [0, []] [1, [42]]. - assert_eq!(vec![0x92, 0x00, 0x90, 0x92, 0x01, 0x91, 0x2a], buf); -} - -#[test] -fn pass_tuple_struct() { - #[derive(Serialize)] - struct Struct(u32, u64); - - let val = Struct(42, 100500); - let mut buf = Vec::new(); - val.serialize(&mut Serializer::new(&mut buf)).unwrap(); - - // Expect: [42, 100500]. - assert_eq!(vec![0x92, 0x2a, 0xce, 0x00, 0x01, 0x88, 0x94], buf); -} - -#[test] -fn pass_tuple_variant() { - #[derive(Serialize)] - enum Enum { - V1, - V2(u32, u64), - } - - let mut buf = Vec::new(); - Enum::V1.serialize(&mut Serializer::new(&mut buf)).unwrap(); - Enum::V2(42, 100500).serialize(&mut Serializer::new(&mut buf)).unwrap(); - - // Expect: [0, []] [2, [42, 100500]]. - assert_eq!(vec![0x92, 0x00, 0x90, 0x92, 0x01, 0x92, 0x2a, 0xce, 0x00, 0x01, 0x88, 0x94], buf); -} - -#[test] -fn pass_struct() { - #[derive(Serialize)] - struct Struct { - f1: u32, - f2: u32, - } - - let val = Struct { - f1: 42, - f2: 100500, - }; - let mut buf = Vec::new(); - val.serialize(&mut Serializer::new(&mut buf)).unwrap(); - - // Expect: [42, 100500]. - assert_eq!(vec![0x92, 0x2a, 0xce, 0x0, 0x1, 0x88, 0x94], buf); -} - -#[test] -fn serialize_struct_variant() { - #[derive(Serialize)] - enum Enum { - V1 { - f1: u32, - }, - V2 { - f1: u32, - }, - } - - let mut buf = Vec::new(); - Enum::V1 { f1: 42 }.serialize(&mut Serializer::new(&mut buf)).unwrap(); - Enum::V2 { f1: 43 }.serialize(&mut Serializer::new(&mut buf)).unwrap(); - - // Expect: [0, [42]] [1, [43]]. - assert_eq!(vec![0x92, 0x00, 0x91, 0x2a, 0x92, 0x01, 0x91, 0x2b], buf); -} - -#[test] -fn pass_struct_as_map() { - use std::io::Write; - use rmp::Marker; - use rmp::encode::{ValueWriteError, write_map_len, write_str}; - use rmps::encode::VariantWriter; - - #[derive(Serialize)] - struct Dog { - name: String, - age: u16, - } - - let dog = Dog { - name: "Bobby".into(), - age: 8, - }; - - struct StructMapWriter; - - impl VariantWriter for StructMapWriter { - fn write_struct_len(&self, wr: &mut W, len: u32) -> - Result - { - write_map_len(wr, len) - } - - fn write_field_name(&self, wr: &mut W, key: &str) -> - Result<(), ValueWriteError> - { - write_str(wr, key) - } - } - - let mut se = Serializer::with(Vec::new(), StructMapWriter); - dog.serialize(&mut se).unwrap(); - - // Expect: {"name": "Bobby", "age": 8}. - assert_eq!(vec![0x82, 0xa4, 0x6e, 0x61, 0x6d, 0x65, 0xa5, 0x42, 0x6f, 0x62, 0x62, 0x79, 0xa3, 0x61, 0x67, 0x65, 0x08], - se.into_inner()); -} diff --git a/third_party/rust/rmp_serde/v0_13/crate/tests/round.rs b/third_party/rust/rmp_serde/v0_13/crate/tests/round.rs deleted file mode 100644 index a5c805883088..000000000000 --- a/third_party/rust/rmp_serde/v0_13/crate/tests/round.rs +++ /dev/null @@ -1,121 +0,0 @@ -extern crate serde; -#[macro_use] -extern crate serde_derive; -extern crate rmp; -extern crate rmp_serde as rmps; - -use std::borrow::Cow; -use std::io::Cursor; - -use serde::{Deserialize, Serialize}; -use rmps::{Deserializer, Serializer}; - -#[test] -fn round_trip_option() { - #[derive(Debug, PartialEq, Serialize, Deserialize)] - struct Foo { - v: Option>, - } - - let expected = Foo { v: None }; - - let mut buf = Vec::new(); - expected.serialize(&mut Serializer::new(&mut buf)).unwrap(); - - let mut de = Deserializer::new(Cursor::new(&buf[..])); - - assert_eq!(expected, Deserialize::deserialize(&mut de).unwrap()); -} - -#[test] -fn round_trip_cow() { - #[derive(Serialize, Deserialize, Debug, PartialEq)] - struct Foo<'a> { - v: Cow<'a, [u8]>, - } - - let expected = Foo { v : Cow::Borrowed(&[]) }; - - let mut buf = Vec::new(); - expected.serialize(&mut Serializer::new(&mut buf)).unwrap(); - - let mut de = Deserializer::new(Cursor::new(&buf[..])); - - assert_eq!(expected, Deserialize::deserialize(&mut de).unwrap()); -} - -#[test] -fn round_trip_option_cow() { - use std::borrow::Cow; - use std::io::Cursor; - use serde::Serialize; - - #[derive(Serialize, Deserialize, Debug, PartialEq)] - struct Foo<'a> { - v: Option>, - } - - let expected = Foo { v : None }; - - let mut buf = Vec::new(); - expected.serialize(&mut Serializer::new(&mut buf)).unwrap(); - - let mut de = Deserializer::new(Cursor::new(&buf[..])); - - assert_eq!(expected, Deserialize::deserialize(&mut de).unwrap()); -} - -#[test] -fn round_enum_with_nested_struct() { - use serde::Serialize; - - #[derive(Serialize, Deserialize, Debug, PartialEq)] - struct Newtype(String); - - #[derive(Serialize, Deserialize, Debug, PartialEq)] - enum Enum { - A(Newtype), - B, - } - - let expected = Enum::A(Newtype("le message".into())); - let mut buf = Vec::new(); - expected.serialize(&mut Serializer::new(&mut buf)).unwrap(); - - let mut de = Deserializer::new(&buf[..]); - - assert_eq!(expected, Deserialize::deserialize(&mut de).unwrap()); -} - -// Checks whether deserialization and serialization can both work with structs as maps -#[test] -fn round_struct_as_map() { - use rmps::to_vec_named; - use rmps::decode::from_slice; - - #[derive(Debug, Serialize, Deserialize, PartialEq, Eq)] - struct Dog1 { - name: String, - age: u16, - } - #[derive(Debug, Serialize, Deserialize, PartialEq, Eq)] - struct Dog2 { - age: u16, - name: String, - } - - let dog1 = Dog1 { - name: "Frankie".into(), - age: 42, - }; - - let serialized: Vec = to_vec_named(&dog1).unwrap(); - let deserialized: Dog2 = from_slice(&serialized).unwrap(); - - let check = Dog1 { - age: deserialized.age, - name: deserialized.name, - }; - - assert_eq!(dog1, check); -} diff --git a/third_party/rust/third_party.toml b/third_party/rust/third_party.toml index 7e68e500fb96..08a1802603ff 100644 --- a/third_party/rust/third_party.toml +++ b/third_party/rust/third_party.toml @@ -63,7 +63,6 @@ skrifa = "0.4" libc = "0.2.107" # speedreader -flate2 = { version = "1.0.25", features = ["rust_backend"], default-features = false } kuchiki = "0.8.1" lol_html = "0.3.0" # regex = "1" @@ -229,9 +228,9 @@ if (is_mac) { ''' [dependencies.adblock] -version = "0.7.17" +version = "0.8.0" default-features = false -features = ["full-regex-handling", "debug-info", "css-validation"] +features = ["full-regex-handling", "regex-debug-info", "css-validation"] gn-variables-lib = ''' if (is_ios) { features += [