169 lines
5.0 KiB
Rust
169 lines
5.0 KiB
Rust
// This file is part of ICU4X. For terms of use, please see the file
|
|
// called LICENSE at the top level of the ICU4X source tree
|
|
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
|
|
|
|
use icu_locid::extensions::private::Private;
|
|
use icu_locid::Locale;
|
|
use litemap::LiteMap;
|
|
use std::collections::BTreeSet;
|
|
use writeable::Writeable;
|
|
use zerotrie::ZeroTriePerfectHash;
|
|
use zerotrie::ZeroTrieSimpleAscii;
|
|
use zerovec::VarZeroVec;
|
|
|
|
mod testdata {
|
|
include!("data/data.rs");
|
|
}
|
|
|
|
use testdata::locales_with_aux::{NUM_UNIQUE_BLOBS, STRINGS};
|
|
use testdata::strings_to_litemap;
|
|
|
|
#[test]
|
|
fn test_combined() {
|
|
let litemap = strings_to_litemap(STRINGS);
|
|
|
|
let vzv: VarZeroVec<str> = STRINGS.into();
|
|
|
|
// Lookup table size:
|
|
assert_eq!(vzv.as_bytes().len(), 10223);
|
|
|
|
// Size including pointer array:
|
|
assert_eq!(
|
|
vzv.as_bytes().len() + STRINGS.len() * core::mem::size_of::<usize>(),
|
|
18639
|
|
);
|
|
|
|
let trie = ZeroTrieSimpleAscii::try_from(&litemap).unwrap();
|
|
|
|
// Lookup table size:
|
|
assert_eq!(trie.byte_len(), 5104);
|
|
|
|
// Size including pointer array:
|
|
assert_eq!(
|
|
trie.byte_len() + NUM_UNIQUE_BLOBS * core::mem::size_of::<usize>(),
|
|
8392
|
|
);
|
|
|
|
let trie = ZeroTriePerfectHash::try_from(&litemap).unwrap();
|
|
|
|
// Lookup table size:
|
|
assert_eq!(trie.byte_len(), 5157);
|
|
|
|
// Size including pointer array:
|
|
assert_eq!(
|
|
trie.byte_len() + NUM_UNIQUE_BLOBS * core::mem::size_of::<usize>(),
|
|
8445
|
|
);
|
|
|
|
let total_str_len = litemap.iter_keys().map(|k| k.len()).sum::<usize>();
|
|
assert_eq!(total_str_len, 8115);
|
|
|
|
// Lookup table size:
|
|
assert_eq!(
|
|
total_str_len + STRINGS.len() * core::mem::size_of::<usize>(),
|
|
16531
|
|
);
|
|
|
|
// Size including pointer array: (2x for the lookup array and value array)
|
|
assert_eq!(
|
|
total_str_len + 2 * STRINGS.len() * core::mem::size_of::<usize>(),
|
|
24947
|
|
);
|
|
|
|
// Size including u16 pointer array:
|
|
assert_eq!(
|
|
total_str_len
|
|
+ STRINGS.len() * core::mem::size_of::<usize>()
|
|
+ STRINGS.len() * core::mem::size_of::<u16>()
|
|
+ NUM_UNIQUE_BLOBS * core::mem::size_of::<usize>(),
|
|
21923
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_aux_split() {
|
|
let locales: Vec<Locale> = STRINGS.iter().map(|s| s.parse().unwrap()).collect();
|
|
|
|
let aux_keys: BTreeSet<&Private> = locales.iter().map(|l| &l.extensions.private).collect();
|
|
assert_eq!(aux_keys.len(), 6);
|
|
|
|
let mut cumulative_index = 0;
|
|
let mut total_simpleascii_len = 0;
|
|
let mut total_perfecthash_len = 0;
|
|
let mut total_vzv_len = 0;
|
|
let mut unique_locales = BTreeSet::new();
|
|
for private in aux_keys.iter() {
|
|
let current_locales: Vec<Locale> = locales
|
|
.iter()
|
|
.filter(|l| l.extensions.private == **private)
|
|
.map(|l| {
|
|
let mut l = l.clone();
|
|
l.extensions.private = Private::default();
|
|
l
|
|
})
|
|
.collect();
|
|
let litemap: LiteMap<Vec<u8>, usize> = current_locales
|
|
.iter()
|
|
.map(|l| {
|
|
(l.write_to_string().into_owned().into_bytes(), {
|
|
cumulative_index += 1;
|
|
cumulative_index - 1
|
|
})
|
|
})
|
|
.collect();
|
|
|
|
let trie = ZeroTrieSimpleAscii::try_from(&litemap).unwrap();
|
|
total_simpleascii_len += trie.byte_len();
|
|
|
|
let trie = ZeroTriePerfectHash::try_from(&litemap).unwrap();
|
|
total_perfecthash_len += trie.byte_len();
|
|
|
|
for k in litemap.iter_keys() {
|
|
unique_locales.insert(k.clone());
|
|
}
|
|
|
|
let strs: Vec<String> = current_locales
|
|
.iter()
|
|
.map(|l| l.write_to_string().into_owned())
|
|
.collect();
|
|
let vzv: VarZeroVec<str> = strs.as_slice().into();
|
|
total_vzv_len += vzv.as_bytes().len();
|
|
}
|
|
assert_eq!(cumulative_index, locales.len());
|
|
|
|
assert_eq!(total_simpleascii_len, 5098);
|
|
assert_eq!(total_perfecthash_len, 5302);
|
|
assert_eq!(total_vzv_len, 5510);
|
|
|
|
let total_unique_locale_str_len = unique_locales.iter().map(|v| v.len()).sum::<usize>();
|
|
assert_eq!(total_unique_locale_str_len, 945);
|
|
|
|
// Size including pointer array:
|
|
assert_eq!(
|
|
total_simpleascii_len + NUM_UNIQUE_BLOBS * core::mem::size_of::<usize>(),
|
|
8386
|
|
);
|
|
assert_eq!(
|
|
total_perfecthash_len + NUM_UNIQUE_BLOBS * core::mem::size_of::<usize>(),
|
|
8590
|
|
);
|
|
assert_eq!(
|
|
total_vzv_len + STRINGS.len() * core::mem::size_of::<usize>(),
|
|
13926
|
|
);
|
|
// 2x for the lookup arrays and value arrays
|
|
assert_eq!(
|
|
total_unique_locale_str_len + 2 * STRINGS.len() * core::mem::size_of::<usize>(),
|
|
17777
|
|
);
|
|
|
|
// Size including u16 pointer array:
|
|
assert_eq!(
|
|
total_unique_locale_str_len
|
|
+ STRINGS.len() * core::mem::size_of::<usize>()
|
|
+ STRINGS.len() * core::mem::size_of::<u16>()
|
|
+ NUM_UNIQUE_BLOBS * core::mem::size_of::<usize>(),
|
|
14753
|
|
);
|
|
}
|