perf: combine token split into 1 pass (#427)
This commit is contained in:
@@ -27,15 +27,11 @@ export class Tokenizer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let tokens = this.tokenizeTokens(text, { skipChs: true })
|
let tokens = this.tokenizeTokens(text, { skipChs: true })
|
||||||
|
tokens = [...tokens.flatMap(token => [
|
||||||
// Split hyphenated tokens
|
token,
|
||||||
tokens = [...tokens, ...tokens.flatMap(splitHyphens)]
|
...splitHyphens(token),
|
||||||
|
...splitCamelCase(token),
|
||||||
// Split camelCase tokens into "camel" and "case
|
]), ...words]
|
||||||
tokens = [...tokens, ...tokens.flatMap(splitCamelCase)]
|
|
||||||
|
|
||||||
// Add whole words (aka "not tokens")
|
|
||||||
tokens = [...tokens, ...words]
|
|
||||||
|
|
||||||
// Add urls
|
// Add urls
|
||||||
if (urls.length) {
|
if (urls.length) {
|
||||||
|
|||||||
Reference in New Issue
Block a user