typst/vendor/zerotrie/tests/locale_aux_test.rs
2024-10-16 14:18:46 +03:00

169 lines
5.0 KiB
Rust

// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
use icu_locid::extensions::private::Private;
use icu_locid::Locale;
use litemap::LiteMap;
use std::collections::BTreeSet;
use writeable::Writeable;
use zerotrie::ZeroTriePerfectHash;
use zerotrie::ZeroTrieSimpleAscii;
use zerovec::VarZeroVec;
mod testdata {
include!("data/data.rs");
}
use testdata::locales_with_aux::{NUM_UNIQUE_BLOBS, STRINGS};
use testdata::strings_to_litemap;
#[test]
fn test_combined() {
let litemap = strings_to_litemap(STRINGS);
let vzv: VarZeroVec<str> = STRINGS.into();
// Lookup table size:
assert_eq!(vzv.as_bytes().len(), 10223);
// Size including pointer array:
assert_eq!(
vzv.as_bytes().len() + STRINGS.len() * core::mem::size_of::<usize>(),
18639
);
let trie = ZeroTrieSimpleAscii::try_from(&litemap).unwrap();
// Lookup table size:
assert_eq!(trie.byte_len(), 5104);
// Size including pointer array:
assert_eq!(
trie.byte_len() + NUM_UNIQUE_BLOBS * core::mem::size_of::<usize>(),
8392
);
let trie = ZeroTriePerfectHash::try_from(&litemap).unwrap();
// Lookup table size:
assert_eq!(trie.byte_len(), 5157);
// Size including pointer array:
assert_eq!(
trie.byte_len() + NUM_UNIQUE_BLOBS * core::mem::size_of::<usize>(),
8445
);
let total_str_len = litemap.iter_keys().map(|k| k.len()).sum::<usize>();
assert_eq!(total_str_len, 8115);
// Lookup table size:
assert_eq!(
total_str_len + STRINGS.len() * core::mem::size_of::<usize>(),
16531
);
// Size including pointer array: (2x for the lookup array and value array)
assert_eq!(
total_str_len + 2 * STRINGS.len() * core::mem::size_of::<usize>(),
24947
);
// Size including u16 pointer array:
assert_eq!(
total_str_len
+ STRINGS.len() * core::mem::size_of::<usize>()
+ STRINGS.len() * core::mem::size_of::<u16>()
+ NUM_UNIQUE_BLOBS * core::mem::size_of::<usize>(),
21923
);
}
#[test]
fn test_aux_split() {
let locales: Vec<Locale> = STRINGS.iter().map(|s| s.parse().unwrap()).collect();
let aux_keys: BTreeSet<&Private> = locales.iter().map(|l| &l.extensions.private).collect();
assert_eq!(aux_keys.len(), 6);
let mut cumulative_index = 0;
let mut total_simpleascii_len = 0;
let mut total_perfecthash_len = 0;
let mut total_vzv_len = 0;
let mut unique_locales = BTreeSet::new();
for private in aux_keys.iter() {
let current_locales: Vec<Locale> = locales
.iter()
.filter(|l| l.extensions.private == **private)
.map(|l| {
let mut l = l.clone();
l.extensions.private = Private::default();
l
})
.collect();
let litemap: LiteMap<Vec<u8>, usize> = current_locales
.iter()
.map(|l| {
(l.write_to_string().into_owned().into_bytes(), {
cumulative_index += 1;
cumulative_index - 1
})
})
.collect();
let trie = ZeroTrieSimpleAscii::try_from(&litemap).unwrap();
total_simpleascii_len += trie.byte_len();
let trie = ZeroTriePerfectHash::try_from(&litemap).unwrap();
total_perfecthash_len += trie.byte_len();
for k in litemap.iter_keys() {
unique_locales.insert(k.clone());
}
let strs: Vec<String> = current_locales
.iter()
.map(|l| l.write_to_string().into_owned())
.collect();
let vzv: VarZeroVec<str> = strs.as_slice().into();
total_vzv_len += vzv.as_bytes().len();
}
assert_eq!(cumulative_index, locales.len());
assert_eq!(total_simpleascii_len, 5098);
assert_eq!(total_perfecthash_len, 5302);
assert_eq!(total_vzv_len, 5510);
let total_unique_locale_str_len = unique_locales.iter().map(|v| v.len()).sum::<usize>();
assert_eq!(total_unique_locale_str_len, 945);
// Size including pointer array:
assert_eq!(
total_simpleascii_len + NUM_UNIQUE_BLOBS * core::mem::size_of::<usize>(),
8386
);
assert_eq!(
total_perfecthash_len + NUM_UNIQUE_BLOBS * core::mem::size_of::<usize>(),
8590
);
assert_eq!(
total_vzv_len + STRINGS.len() * core::mem::size_of::<usize>(),
13926
);
// 2x for the lookup arrays and value arrays
assert_eq!(
total_unique_locale_str_len + 2 * STRINGS.len() * core::mem::size_of::<usize>(),
17777
);
// Size including u16 pointer array:
assert_eq!(
total_unique_locale_str_len
+ STRINGS.len() * core::mem::size_of::<usize>()
+ STRINGS.len() * core::mem::size_of::<u16>()
+ NUM_UNIQUE_BLOBS * core::mem::size_of::<usize>(),
14753
);
}