Unicode Break Algorithms
(require unicode-breaks) | package: unicode-breaks |
Racket 8.7 added basic support for working with Unicode grapheme clusters, where multiple codepoints make up an entity that is rendered as a single character. This module expands that functionality, and adds word and sentence breaks from Unicode Annex #29, Text Segmentation. It does not attempt to provide language/locale specific algorithms.
The rules used are in accordance with Unicode 15.1, to match Racket 8.13.
1 Grapheme Breaks
procedure
(in-graphemes str [start end]) → (sequence/c string?)
str : string? start : exact-nonnegative-integer? = 0 end : exact-nonnegative-integer? = (string-length str)
procedure
(string-split-graphemes str [start end]) → (listof string?)
str : string? start : exact-nonnegative-integer? = 0 end : exact-nonnegative-integer? = (string-length str)
procedure
(string-split-graphemes/immutable str [ start end]) → (listof (and/c string? immutable?)) str : string? start : exact-nonnegative-integer? = 0 end : exact-nonnegative-integer? = (string-length str)
procedure
(string-grapheme-indexes str [start end])
→ (listof exact-nonnegative-integer?) str : string? start : exact-nonnegative-integer? = 0 end : exact-nonnegative-integer? = (string-length str)
2 Word Breaks
procedure
(char-word-break-property ch) → symbol?
ch : char?
procedure
(string-word-break-at? str i [start end]) → boolean?
str : string? i : exact-nonnegative-integer? start : exact-nonnegative-integer? = 0 end : exact-nonnegative-integer? = (string-length str)
procedure
(string-word-span str start [end]) → exact-nonnegative-integer?
str : string? start : exact-nonnegative-integer? end : exact-nonnegative-integer? = (string-length str)
procedure
(in-words str [ start end #:skip-blanks? skip-blanks?]) → (sequence/c string?) str : string? start : exact-nonnegative-integer? = 0 end : exact-nonnegative-integer? = (string-length str) skip-blanks? : any/c = #f
procedure
(string-split-words str [ start end #:skip-blanks? skip-blanks?]) → (listof string?) str : string? start : exact-nonnegative-integer? = 0 end : exact-nonnegative-integer? = (string-length str) skip-blanks? : any/c = #f
procedure
(string-split-words/immutable str [ start end #:skip-blanks? skip-blanks?]) → (listof (and/c string? immutable?)) str : string? start : exact-nonnegative-integer? = 0 end : exact-nonnegative-integer? = (string-length str) skip-blanks? : any/c = #f
procedure
(string-word-break-indexes str [start end])
→ (listof exact-nonnegative-integer?) str : string? start : exact-nonnegative-integer? = 0 end : exact-nonnegative-integer? = (string-length str)
3 Sentence Breaks
procedure
ch : char?
procedure
(in-sentences str [start end]) → (sequence/c string?)
str : string? start : exact-nonnegative-integer? = 0 end : exact-nonnegative-integer? = (string-length str)
procedure
(string-split-sentences str [start end]) → (listof string?)
str : string? start : exact-nonnegative-integer? = 0 end : exact-nonnegative-integer? = (string-length str)
procedure
(string-split-sentencess/immutable str [ start end]) → (listof (and/c string? immutable?)) str : string? start : exact-nonnegative-integer? = 0 end : exact-nonnegative-integer? = (string-length str)
procedure
→ (listof exact-nonnegative-integer?) str : string? start : exact-nonnegative-integer? = 0 end : exact-nonnegative-integer? = (string-length str)
4 Other functions
procedure
(char-east-asian-width-property ch) → (or/c 'N 'Na 'H 'A 'F 'W)
ch : char?