Skip to content

Commit

Permalink
Add fallback from unicode decomposition
Browse files Browse the repository at this point in the history
  • Loading branch information
kornelski committed Dec 15, 2022
1 parent cdb2d59 commit 3de7ad0
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 8 deletions.
1 change: 1 addition & 0 deletions scripts/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ serde_json = "1.0.48"
serde_derive = "1.0.104"
any_ascii = "0.2.0"
emojis = "0.4.0"
unicode-normalization = "0.1.22"
25 changes: 17 additions & 8 deletions scripts/compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,24 +123,33 @@ fn main() {
}
}

for (i, ch) in all_codepoints.iter_mut().enumerate().skip(255) {
if *ch == UNKNOWN_CHAR {
let any = std::char::from_u32(i as u32)
.map(any_ascii::any_ascii_char)
.unwrap_or("")
.trim_matches(':');
for i in 255..all_codepoints.len() {
let Some(codepoint) = std::char::from_u32(i as u32) else { continue; };
let ch = all_codepoints[i];
if ch == UNKNOWN_CHAR {
let any = any_ascii::any_ascii_char(codepoint).trim_matches(':');
if any != "" {
// we use spaces instead of underscores in emoji
*ch = if any.chars().any(|c| c.is_alphabetic()) && any.chars().any(|c| c == '_') {
all_codepoints[i] = if any.chars().any(|c| c.is_alphabetic()) && any.chars().any(|c| c == '_') {
let ch: String = any.chars().map(|c| if c == '_' {' '} else {c}).collect();
Box::leak(ch.into_boxed_str())
} else {
any
};
} else {
let mut s = String::new();
let mut changed = false;
unicode_normalization::char::decompose_compatible(codepoint, |denorm| {
if denorm as usize != i { changed = true; }
all_codepoints.get(denorm as usize).map(|c| s.push_str(c));
});
if changed && !s.trim().is_empty() && s.bytes().all(|c| c < 255) {
all_codepoints[i] = Box::leak(s.into_boxed_str());
}
}
} else if ch.starts_with("[d") {
// clean up [d123]
*ch = ch.trim_start_matches('[').trim_end_matches(']');
all_codepoints[i] = ch.trim_start_matches('[').trim_end_matches(']');
};
}

Expand Down
Binary file modified src/mapping.txt
Binary file not shown.
Binary file modified src/pointers.bin
Binary file not shown.

0 comments on commit 3de7ad0

Please sign in to comment.