Use icu4x for linebreaking algorithm (#1355)
This commit is contained in:
parent
11714609b8
commit
e2bf2327b5
254
Cargo.lock
generated
254
Cargo.lock
generated
@ -338,6 +338,12 @@ dependencies = [
|
|||||||
"roff",
|
"roff",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cobs"
|
||||||
|
version = "0.2.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "67ba02a97a2bd10f4b59b25c7973101c79642302776489e030cd13cdab09ed15"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "codespan-reporting"
|
name = "codespan-reporting"
|
||||||
version = "0.11.1"
|
version = "0.11.1"
|
||||||
@ -803,6 +809,118 @@ dependencies = [
|
|||||||
"cc",
|
"cc",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "icu_collections"
|
||||||
|
version = "1.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ef8302d8dfd6044d3ddb3f807a5ef3d7bbca9a574959c6d6e4dc39aa7012d0d5"
|
||||||
|
dependencies = [
|
||||||
|
"displaydoc",
|
||||||
|
"serde",
|
||||||
|
"yoke",
|
||||||
|
"zerofrom",
|
||||||
|
"zerovec",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "icu_locid"
|
||||||
|
version = "1.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3003f85dccfc0e238ff567693248c59153a46f4e6125ba4020b973cef4d1d335"
|
||||||
|
dependencies = [
|
||||||
|
"displaydoc",
|
||||||
|
"litemap",
|
||||||
|
"tinystr",
|
||||||
|
"writeable",
|
||||||
|
"zerovec",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "icu_properties"
|
||||||
|
version = "1.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ce0e1aa26851f16c9e04412a5911c86b7f8768dac8f8d4c5f1c568a7e5d7a434"
|
||||||
|
dependencies = [
|
||||||
|
"displaydoc",
|
||||||
|
"icu_collections",
|
||||||
|
"icu_provider",
|
||||||
|
"serde",
|
||||||
|
"tinystr",
|
||||||
|
"zerovec",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "icu_provider"
|
||||||
|
version = "1.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8dc312a7b6148f7dfe098047ae2494d12d4034f48ade58d4f353000db376e305"
|
||||||
|
dependencies = [
|
||||||
|
"displaydoc",
|
||||||
|
"icu_locid",
|
||||||
|
"icu_provider_macros",
|
||||||
|
"postcard",
|
||||||
|
"serde",
|
||||||
|
"stable_deref_trait",
|
||||||
|
"writeable",
|
||||||
|
"yoke",
|
||||||
|
"zerofrom",
|
||||||
|
"zerovec",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "icu_provider_adapters"
|
||||||
|
version = "1.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f4ae1e2bd0c41728b77e7c46e9afdec5e2127d1eedacc684724667d50c126bd3"
|
||||||
|
dependencies = [
|
||||||
|
"icu_locid",
|
||||||
|
"icu_provider",
|
||||||
|
"tinystr",
|
||||||
|
"yoke",
|
||||||
|
"zerovec",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "icu_provider_blob"
|
||||||
|
version = "1.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "fd364c9a01f791a4bc04a74cf2a1d01d9f6926a40fd5ae1c28004e1e70d8338b"
|
||||||
|
dependencies = [
|
||||||
|
"icu_provider",
|
||||||
|
"postcard",
|
||||||
|
"serde",
|
||||||
|
"writeable",
|
||||||
|
"yoke",
|
||||||
|
"zerovec",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "icu_provider_macros"
|
||||||
|
version = "1.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "dd8b728b9421e93eff1d9f8681101b78fa745e0748c95c655c83f337044a7e10"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn 1.0.109",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "icu_segmenter"
|
||||||
|
version = "1.2.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c3300a7b6bf187be98a57264ad094f11f2e062c2e8263132af010ff522ee5495"
|
||||||
|
dependencies = [
|
||||||
|
"displaydoc",
|
||||||
|
"icu_collections",
|
||||||
|
"icu_locid",
|
||||||
|
"icu_provider",
|
||||||
|
"num-traits",
|
||||||
|
"serde",
|
||||||
|
"utf8_iter",
|
||||||
|
"zerovec",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "idna"
|
name = "idna"
|
||||||
version = "0.3.0"
|
version = "0.3.0"
|
||||||
@ -1063,6 +1181,12 @@ dependencies = [
|
|||||||
"libdeflate-sys",
|
"libdeflate-sys",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "libm"
|
||||||
|
version = "0.2.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "linked-hash-map"
|
name = "linked-hash-map"
|
||||||
version = "0.5.6"
|
version = "0.5.6"
|
||||||
@ -1085,6 +1209,12 @@ dependencies = [
|
|||||||
"rand_chacha",
|
"rand_chacha",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "litemap"
|
||||||
|
version = "0.7.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3a04a5b2b6f54acba899926491d0a6c59d98012938ca2ab5befb281c034e8f94"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lock_api"
|
name = "lock_api"
|
||||||
version = "0.4.9"
|
version = "0.4.9"
|
||||||
@ -1227,6 +1357,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
|
checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"autocfg",
|
"autocfg",
|
||||||
|
"libm",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@ -1397,6 +1528,16 @@ dependencies = [
|
|||||||
"miniz_oxide",
|
"miniz_oxide",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "postcard"
|
||||||
|
version = "1.0.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "cfa512cd0d087cc9f99ad30a1bf64795b67871edbead083ffc3a4dfafa59aa00"
|
||||||
|
dependencies = [
|
||||||
|
"cobs",
|
||||||
|
"serde",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ppv-lite86"
|
name = "ppv-lite86"
|
||||||
version = "0.2.17"
|
version = "0.2.17"
|
||||||
@ -1911,6 +2052,18 @@ dependencies = [
|
|||||||
"unicode-ident",
|
"unicode-ident",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "synstructure"
|
||||||
|
version = "0.12.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn 1.0.109",
|
||||||
|
"unicode-xid",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "syntect"
|
name = "syntect"
|
||||||
version = "5.0.0"
|
version = "5.0.0"
|
||||||
@ -2056,6 +2209,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "7ac3f5b6856e931e15e07b478e98c8045239829a65f9156d4fa7e7788197a5ef"
|
checksum = "7ac3f5b6856e931e15e07b478e98c8045239829a65f9156d4fa7e7788197a5ef"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"displaydoc",
|
"displaydoc",
|
||||||
|
"serde",
|
||||||
|
"zerovec",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@ -2299,6 +2454,11 @@ dependencies = [
|
|||||||
"ecow",
|
"ecow",
|
||||||
"hayagriva",
|
"hayagriva",
|
||||||
"hypher",
|
"hypher",
|
||||||
|
"icu_properties",
|
||||||
|
"icu_provider",
|
||||||
|
"icu_provider_adapters",
|
||||||
|
"icu_provider_blob",
|
||||||
|
"icu_segmenter",
|
||||||
"kurbo",
|
"kurbo",
|
||||||
"lipsum",
|
"lipsum",
|
||||||
"log",
|
"log",
|
||||||
@ -2319,7 +2479,6 @@ dependencies = [
|
|||||||
"unicode-math-class",
|
"unicode-math-class",
|
||||||
"unicode-script",
|
"unicode-script",
|
||||||
"unicode-segmentation",
|
"unicode-segmentation",
|
||||||
"xi-unicode",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@ -2447,6 +2606,12 @@ version = "0.1.10"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
|
checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unicode-xid"
|
||||||
|
version = "0.2.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "unicode_names2"
|
name = "unicode_names2"
|
||||||
version = "0.6.0"
|
version = "0.6.0"
|
||||||
@ -2530,6 +2695,12 @@ dependencies = [
|
|||||||
"svgtypes",
|
"svgtypes",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "utf8_iter"
|
||||||
|
version = "1.0.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "64a8922555b9500e3d865caed19330172cd67cbf82203f1a3311d8c305cc9f33"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "utf8parse"
|
name = "utf8parse"
|
||||||
version = "0.2.1"
|
version = "0.2.1"
|
||||||
@ -2814,6 +2985,12 @@ dependencies = [
|
|||||||
"memchr",
|
"memchr",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "writeable"
|
||||||
|
version = "0.5.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "60e49e42bdb1d5dc76f4cd78102f8f0714d32edfa3efb82286eb0f0b1fc0da0f"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "wyz"
|
name = "wyz"
|
||||||
version = "0.5.1"
|
version = "0.5.1"
|
||||||
@ -2823,12 +3000,6 @@ dependencies = [
|
|||||||
"tap",
|
"tap",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "xi-unicode"
|
|
||||||
version = "0.3.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "a67300977d3dc3f8034dae89778f502b6ba20b269527b3223ba59c0cf393bb8a"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "xmlparser"
|
name = "xmlparser"
|
||||||
version = "0.13.5"
|
version = "0.13.5"
|
||||||
@ -2866,6 +3037,75 @@ dependencies = [
|
|||||||
"linked-hash-map",
|
"linked-hash-map",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "yoke"
|
||||||
|
version = "0.7.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1848075a23a28f9773498ee9a0f2cf58fcbad4f8c0ccf84a210ab33c6ae495de"
|
||||||
|
dependencies = [
|
||||||
|
"serde",
|
||||||
|
"stable_deref_trait",
|
||||||
|
"yoke-derive",
|
||||||
|
"zerofrom",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "yoke-derive"
|
||||||
|
version = "0.7.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "af46c169923ed7516eef0aa32b56d2651b229f57458ebe46b49ddd6efef5b7a2"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn 1.0.109",
|
||||||
|
"synstructure",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "zerofrom"
|
||||||
|
version = "0.1.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "df54d76c3251de27615dfcce21e636c172dafb2549cd7fd93e21c66f6ca6bea2"
|
||||||
|
dependencies = [
|
||||||
|
"zerofrom-derive",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "zerofrom-derive"
|
||||||
|
version = "0.1.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b4eae7c1f7d4b8eafce526bc0771449ddc2f250881ae31c50d22c032b5a1c499"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn 1.0.109",
|
||||||
|
"synstructure",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "zerovec"
|
||||||
|
version = "0.9.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "198f54134cd865f437820aa3b43d0ad518af4e68ee161b444cdd15d8e567c8ea"
|
||||||
|
dependencies = [
|
||||||
|
"serde",
|
||||||
|
"yoke",
|
||||||
|
"zerofrom",
|
||||||
|
"zerovec-derive",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "zerovec-derive"
|
||||||
|
version = "0.9.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "486558732d5dde10d0f8cb2936507c1bb21bc539d924c949baf5f36a58e51bac"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn 1.0.109",
|
||||||
|
"synstructure",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "zopfli"
|
name = "zopfli"
|
||||||
version = "0.7.2"
|
version = "0.7.2"
|
||||||
|
BIN
assets/data/cj_linebreak_data.postcard
Normal file
BIN
assets/data/cj_linebreak_data.postcard
Normal file
Binary file not shown.
BIN
assets/data/icudata.postcard
Normal file
BIN
assets/data/icudata.postcard
Normal file
Binary file not shown.
BIN
assets/fonts/NotoSansThai-Regular.ttf
Normal file
BIN
assets/fonts/NotoSansThai-Regular.ttf
Normal file
Binary file not shown.
@ -25,6 +25,11 @@ csv = "1"
|
|||||||
ecow = "0.1"
|
ecow = "0.1"
|
||||||
hayagriva = "0.3"
|
hayagriva = "0.3"
|
||||||
hypher = "0.1"
|
hypher = "0.1"
|
||||||
|
icu_properties = { version = "1.2.0", features = ["serde"] }
|
||||||
|
icu_provider = { version = "1.2.0", features = ["sync"] }
|
||||||
|
icu_provider_adapters = "1.2.0"
|
||||||
|
icu_provider_blob = "1.2.0"
|
||||||
|
icu_segmenter = { version = "1.2.1", features = ["serde"] }
|
||||||
kurbo = "0.9"
|
kurbo = "0.9"
|
||||||
lipsum = "0.9"
|
lipsum = "0.9"
|
||||||
log = "0.4"
|
log = "0.4"
|
||||||
@ -44,4 +49,3 @@ unicode-bidi = "0.3.13"
|
|||||||
unicode-math-class = "0.1"
|
unicode-math-class = "0.1"
|
||||||
unicode-script = "0.5"
|
unicode-script = "0.5"
|
||||||
unicode-segmentation = "1"
|
unicode-segmentation = "1"
|
||||||
xi-unicode = "0.3"
|
|
||||||
|
@ -1,7 +1,12 @@
|
|||||||
|
use icu_properties::{maps::CodePointMapData, LineBreak};
|
||||||
|
use icu_provider::AsDeserializingBufferProvider;
|
||||||
|
use icu_provider_adapters::fork::ForkByKeyProvider;
|
||||||
|
use icu_provider_blob::BlobDataProvider;
|
||||||
|
use icu_segmenter::{LineBreakIteratorUtf8, LineSegmenter};
|
||||||
|
use once_cell::sync::Lazy;
|
||||||
use typst::eval::Tracer;
|
use typst::eval::Tracer;
|
||||||
use unicode_bidi::{BidiInfo, Level as BidiLevel};
|
use unicode_bidi::{BidiInfo, Level as BidiLevel};
|
||||||
use unicode_script::{Script, UnicodeScript};
|
use unicode_script::{Script, UnicodeScript};
|
||||||
use xi_unicode::LineBreakIterator;
|
|
||||||
|
|
||||||
use super::{BoxElem, HElem, Sizing, Spacing};
|
use super::{BoxElem, HElem, Sizing, Spacing};
|
||||||
use crate::layout::AlignElem;
|
use crate::layout::AlignElem;
|
||||||
@ -998,15 +1003,65 @@ fn linebreak_optimized<'a>(vt: &Vt, p: &'a Preparation<'a>, width: Abs) -> Vec<L
|
|||||||
lines
|
lines
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Generated by the following command:
|
||||||
|
///
|
||||||
|
/// ```sh
|
||||||
|
/// icu4x-datagen --locales full --keys-for-bin target/debug/typst \
|
||||||
|
/// --format blob --out assets/data/icudata.postcard --overwrite
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// Install icu4x-datagen with `cargo install icu4x-datagen`.
|
||||||
|
static ICU_DATA: &[u8] = include_bytes!("../../../assets/data/icudata.postcard");
|
||||||
|
|
||||||
|
/// Gnerated by the following command:
|
||||||
|
///
|
||||||
|
/// ```sh
|
||||||
|
/// icu4x-datagen --locales zh ja --keys segmenter/line@1 --format blob \
|
||||||
|
/// --out assets/data/cj_linebreak_data.postcard --overwrite
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// The used icu4x-datagen should be patched by
|
||||||
|
/// https://github.com/peng1999/icu4x/commit/b9beb6cbf633d61fc3d7983e5baf7f4449fbfae5
|
||||||
|
static CJ_LINEBREAK_DATA: &[u8] =
|
||||||
|
include_bytes!("../../../assets/data/cj_linebreak_data.postcard");
|
||||||
|
|
||||||
|
/// The general line break segmenter.
|
||||||
|
static SEGMENTER: Lazy<LineSegmenter> = Lazy::new(|| {
|
||||||
|
let provider = BlobDataProvider::try_new_from_static_blob(ICU_DATA).unwrap();
|
||||||
|
LineSegmenter::try_new_lstm_with_buffer_provider(&provider).unwrap()
|
||||||
|
});
|
||||||
|
|
||||||
|
/// The Unicode line break properties for each code point.
|
||||||
|
static CJ_SEGMENTER: Lazy<LineSegmenter> = Lazy::new(|| {
|
||||||
|
let provider = BlobDataProvider::try_new_from_static_blob(ICU_DATA).unwrap();
|
||||||
|
let cj_blob = BlobDataProvider::try_new_from_static_blob(CJ_LINEBREAK_DATA).unwrap();
|
||||||
|
let cj_provider = ForkByKeyProvider::new(cj_blob, provider);
|
||||||
|
LineSegmenter::try_new_lstm_with_buffer_provider(&cj_provider).unwrap()
|
||||||
|
});
|
||||||
|
|
||||||
|
/// The line break segmenter for Chinese/Jpanese text.
|
||||||
|
static LINEBREAK_DATA: Lazy<CodePointMapData<LineBreak>> = Lazy::new(|| {
|
||||||
|
let provider = BlobDataProvider::try_new_from_static_blob(ICU_DATA).unwrap();
|
||||||
|
let deser_provider = provider.as_deserializing();
|
||||||
|
icu_properties::maps::load_line_break(&deser_provider).unwrap()
|
||||||
|
});
|
||||||
|
|
||||||
/// Determine all possible points in the text where lines can broken.
|
/// Determine all possible points in the text where lines can broken.
|
||||||
///
|
///
|
||||||
/// Returns for each breakpoint the text index, whether the break is mandatory
|
/// Returns for each breakpoint the text index, whether the break is mandatory
|
||||||
/// (after `\n`) and whether a hyphen is required (when breaking inside of a
|
/// (after `\n`) and whether a hyphen is required (when breaking inside of a
|
||||||
/// word).
|
/// word).
|
||||||
fn breakpoints<'a>(p: &'a Preparation<'a>) -> Breakpoints<'a> {
|
fn breakpoints<'a>(p: &'a Preparation<'a>) -> Breakpoints<'a> {
|
||||||
|
let mut linebreaks = if matches!(p.lang, Some(Lang::CHINESE | Lang::JAPANESE)) {
|
||||||
|
CJ_SEGMENTER.segment_str(p.bidi.text)
|
||||||
|
} else {
|
||||||
|
SEGMENTER.segment_str(p.bidi.text)
|
||||||
|
};
|
||||||
|
// The iterator always yields a breakpoint at index 0, we want to ignore it
|
||||||
|
linebreaks.next();
|
||||||
Breakpoints {
|
Breakpoints {
|
||||||
p,
|
p,
|
||||||
linebreaks: LineBreakIterator::new(p.bidi.text),
|
linebreaks,
|
||||||
syllables: None,
|
syllables: None,
|
||||||
offset: 0,
|
offset: 0,
|
||||||
suffix: 0,
|
suffix: 0,
|
||||||
@ -1020,7 +1075,7 @@ struct Breakpoints<'a> {
|
|||||||
/// The paragraph's items.
|
/// The paragraph's items.
|
||||||
p: &'a Preparation<'a>,
|
p: &'a Preparation<'a>,
|
||||||
/// The inner iterator over the unicode line break opportunities.
|
/// The inner iterator over the unicode line break opportunities.
|
||||||
linebreaks: LineBreakIterator<'a>,
|
linebreaks: LineBreakIteratorUtf8<'a, 'a>,
|
||||||
/// Iterator over syllables of the current word.
|
/// Iterator over syllables of the current word.
|
||||||
syllables: Option<hypher::Syllables<'a>>,
|
syllables: Option<hypher::Syllables<'a>>,
|
||||||
/// The current text offset.
|
/// The current text offset.
|
||||||
@ -1054,8 +1109,20 @@ impl Iterator for Breakpoints<'_> {
|
|||||||
return Some((self.offset, self.mandatory && !hyphen, hyphen));
|
return Some((self.offset, self.mandatory && !hyphen, hyphen));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let lb = LINEBREAK_DATA.as_borrowed();
|
||||||
|
|
||||||
// Get the next "word".
|
// Get the next "word".
|
||||||
(self.end, self.mandatory) = self.linebreaks.next()?;
|
self.end = self.linebreaks.next()?;
|
||||||
|
self.mandatory =
|
||||||
|
self.p.bidi.text[..self.end].chars().next_back().map_or(false, |c| {
|
||||||
|
matches!(
|
||||||
|
lb.get(c),
|
||||||
|
LineBreak::MandatoryBreak
|
||||||
|
| LineBreak::CarriageReturn
|
||||||
|
| LineBreak::LineFeed
|
||||||
|
| LineBreak::NextLine
|
||||||
|
) || self.end == self.p.bidi.text.len()
|
||||||
|
});
|
||||||
|
|
||||||
// Hyphenate the next word.
|
// Hyphenate the next word.
|
||||||
if self.p.hyphenate != Some(false) {
|
if self.p.hyphenate != Some(false) {
|
||||||
|
@ -524,6 +524,7 @@ impl Lang {
|
|||||||
pub const FRENCH: Self = Self(*b"fr ", 2);
|
pub const FRENCH: Self = Self(*b"fr ", 2);
|
||||||
pub const GERMAN: Self = Self(*b"de ", 2);
|
pub const GERMAN: Self = Self(*b"de ", 2);
|
||||||
pub const ITALIAN: Self = Self(*b"it ", 2);
|
pub const ITALIAN: Self = Self(*b"it ", 2);
|
||||||
|
pub const JAPANESE: Self = Self(*b"ja ", 2);
|
||||||
pub const NYNORSK: Self = Self(*b"nn ", 2);
|
pub const NYNORSK: Self = Self(*b"nn ", 2);
|
||||||
pub const POLISH: Self = Self(*b"pl ", 2);
|
pub const POLISH: Self = Self(*b"pl ", 2);
|
||||||
pub const PORTUGUESE: Self = Self(*b"pt ", 2);
|
pub const PORTUGUESE: Self = Self(*b"pt ", 2);
|
||||||
|
Binary file not shown.
Before Width: | Height: | Size: 21 KiB After Width: | Height: | Size: 38 KiB |
@ -43,7 +43,7 @@
|
|||||||
#set text(font: "Noto Serif CJK SC", lang: "zh")
|
#set text(font: "Noto Serif CJK SC", lang: "zh")
|
||||||
#set par(justify: true)
|
#set par(justify: true)
|
||||||
|
|
||||||
孔雀最早见于《山海经》中的《海内经》:\u{200b}“有孔雀。”东汉杨孚著《异物志》记载,岭南:“孔雀,其大如大雁而足高,毛皆有斑纹彩,捕而蓄之,拍手即舞。”
|
孔雀最早见于《山海经》中的《海内经》:“有孔雀。”东汉杨孚著《异物志》记载,岭南:“孔雀,其大如大雁而足高,毛皆有斑纹彩,捕而蓄之,拍手即舞。”
|
||||||
|
|
||||||
#set text(font: "Noto Serif CJK TC", lang: "zh", region: "hk")
|
#set text(font: "Noto Serif CJK TC", lang: "zh", region: "hk")
|
||||||
孔雀最早见于《山海经》中的《海内经》:「有孔雀。」东汉杨孚著《异物志》记载,岭南:「孔雀,其大如大雁而足高,毛皆有斑纹彩,捕而蓄之,拍手即舞。」
|
孔雀最早见于《山海经》中的《海内经》:「有孔雀。」东汉杨孚著《异物志》记载,岭南:「孔雀,其大如大雁而足高,毛皆有斑纹彩,捕而蓄之,拍手即舞。」
|
||||||
|
@ -43,3 +43,9 @@ Second part
|
|||||||
// Test comments at the end of a line with pre-spacing
|
// Test comments at the end of a line with pre-spacing
|
||||||
First part //
|
First part //
|
||||||
Second part
|
Second part
|
||||||
|
|
||||||
|
---
|
||||||
|
// Test linebreak for East Asian languages
|
||||||
|
ทีวีตรวจทานนอร์ทแฟรีเลคเชอร์โกลด์อัลบัมเชอร์รี่เย้วสโตร์กฤษณ์เคลมเยอบีร่าพ่อค้าบลูเบอร์รี่สหัสวรรษโฮปแคนูโยโย่จูนสตรอว์เบอร์รีซื่อบื้อเยนแบ็กโฮเป็นไงโดนัททอมสเตริโอแคนูวิทย์แดรี่โดนัทวิทย์แอปพริคอทเซอร์ไพรส์ไฮบริดกิฟท์อินเตอร์โซนเซอร์วิสเทียมทานโคโยตี้ม็อบเที่ยงคืนบุญคุณ
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user