package core:unicode/utf8

⌘K
Ctrl+K
or
/

    Types

    Grapheme_Cluster_Sequence ¶

    Grapheme_Cluster_Sequence :: enum int {
    	None, 
    	Indic, 
    	Emoji, 
    	Regional, 
    }

    Grapheme_Iterator ¶

    Grapheme_Iterator :: struct {
    	str:                        string,
    	curr_offset:                int,
    	grapheme_count:             int,
    	// The number of graphemes in the string
    	rune_count:                 int,
    	// The number of runes in the string
    	width:                      int,
    	// The widrth of the string in number of monospace cells
    	last_rune:                  rune,
    	last_rune_breaks_forward:   bool,
    	last_width:                 int,
    	last_grapheme_count:        int,
    	bypass_next_rune:           bool,
    	regional_indicator_counter: int,
    	current_sequence:           Grapheme_Cluster_Sequence,
    	continue_sequence:          bool,
    }
    Related Procedures With Parameters
    Related Procedures With Returns

    RUNE_BOM ¶

    RUNE_BOM :: Grapheme
    Related Procedures With Returns

    Constants

    Grapheme_Cluster_Sequence ¶

    Grapheme_Cluster_Sequence :: UTF_MAX

    HICB ¶

    HICB :: 0b1011_1111

    LOCB ¶

    LOCB :: 0b1000_0000
     

    The default lowest and highest continuation byte.

    MASK2 ¶

    MASK2 :: 0b0001_1111

    MASK3 ¶

    MASK3 :: 0b0000_1111

    MASK4 ¶

    MASK4 :: 0b0000_0111

    MASKX ¶

    MASKX :: 0b0011_1111

    MAX_RUNE ¶

    MAX_RUNE :: '\U0010ffff'

    RUNE1_MAX ¶

    RUNE1_MAX :: 1 << 7 - 1

    RUNE2_MAX ¶

    RUNE2_MAX :: 1 << 11 - 1

    RUNE3_MAX ¶

    RUNE3_MAX :: 1 << 16 - 1

    RUNE_BOM ¶

    RUNE_BOM :: 0xfeff

    RUNE_EOF ¶

    RUNE_EOF: rune : ~rune(0)

    RUNE_SELF ¶

    RUNE_SELF :: 0x80

    SURROGATE_LOW_MIN ¶

    SURROGATE_LOW_MIN :: 0xdc00

    SURROGATE_MAX ¶

    SURROGATE_MAX :: 0xdfff

    SURROGATE_MIN ¶

    SURROGATE_MIN :: SURROGATE_HIGH_MAX
     

    A high/leading surrogate is in range SURROGATE_MIN..SURROGATE_HIGH_MAX, A low/trailing surrogate is in range SURROGATE_LOW_MIN..SURROGATE_MAX.

    T1 ¶

    T1 :: 0b0000_0000

    T2 ¶

    T2 :: 0b1100_0000

    T3 ¶

    T3 :: 0b1110_0000

    T4 ¶

    T4 :: 0b1111_0000

    T5 ¶

    T5 :: 0b1111_1000

    TX ¶

    TX :: 0b1000_0000

    ZERO_WIDTH_JOINER ¶

    ZERO_WIDTH_JOINER :: unicode.ZERO_WIDTH_JOINER

    decode_rune_in_bytes ¶

    decode_rune_in_bytes :: RUNE_ERROR

    Variables

    accept_ranges ¶

    accept_ranges: [5]Accept_Range = …

    accept_sizes ¶

    accept_sizes: [256]u8 = …

    Procedures

    decode_grapheme_clusters ¶

    decode_grapheme_clusters :: proc(str: string, track_graphemes: bool = true, allocator := context.allocator) -> (graphemes: [dynamic]Grapheme, grapheme_count: int, rune_count: int, width: int) {…}
     

    Decode the individual graphemes in a UTF-8 string.

    Allocates Using Provided Allocator

    Inputs:
    str: The input string. track_graphemes: Whether or not to allocate and return graphemes with extra data about each grapheme. allocator: (default: context.allocator)

    Returns:
    graphemes: Extra data about each grapheme. grapheme_count: The number of graphemes in the string. rune_count: The number of runes in the string. width: The width of the string in number of monospace cells.

    decode_grapheme_iterate ¶

    decode_grapheme_iterate :: rune_count_in_bytes
    Related Procedure Groups

    decode_grapheme_iterator_make ¶

    decode_grapheme_iterator_make :: proc(str: string) -> (it: Grapheme_Iterator) {…}

    decode_last_rune_in_bytes ¶

    decode_last_rune_in_bytes :: proc "contextless" (s: []u8) -> (rune, int) {…}
    Related Procedure Groups

    decode_last_rune_in_string ¶

    decode_last_rune_in_string :: proc "contextless" (s: string) -> (rune, int) {…}
    Related Procedure Groups

    decode_rune_in_string ¶

    decode_rune_in_string :: proc "contextless" (s: string) -> (rune, int) {…}
    Related Procedure Groups

    encode_rune ¶

    encode_rune :: proc "contextless" (c: rune) -> ([4]u8, int) {…}

    full_rune_in_bytes ¶

    full_rune_in_bytes :: proc "contextless" (b: []u8) -> bool {…}
     

    full_rune_in_bytes reports if the bytes in b begin with a full utf-8 encoding of a rune or not An invalid encoding is considered a full rune since it will convert as an error rune of width 1 (RUNE_ERROR)

    Related Procedure Groups

    full_rune_in_string ¶

    full_rune_in_string :: proc "contextless" (s: string) -> bool {…}
     

    full_rune_in_string reports if the bytes in s begin with a full utf-8 encoding of a rune or not An invalid encoding is considered a full rune since it will convert as an error rune of width 1 (RUNE_ERROR)

    Related Procedure Groups

    grapheme_count ¶

    grapheme_count :: proc(str: string) -> (graphemes, runes, width: int) {…}
     

    Count the individual graphemes in a UTF-8 string.

    Inputs:
    str: The input string.

    Returns:
    graphemes: The number of graphemes in the string. runes: The number of runes in the string. width: The width of the string in number of monospace cells.

    is_control ¶

    is_control :: rune_count_in_string
    Related Procedure Groups

    is_hangul_syllable_lvt ¶

    is_hangul_syllable_lvt :: rune_offset
     

    Returns the byte position of rune at position pos in s with an optional start byte position. Returns -1 if it runs out of the string.

    is_hangul_syllable_vowel ¶

    is_hangul_syllable_vowel :: decode_rune_in_bytes
    Related Procedure Groups

    rune_at ¶

    rune_at :: proc "contextless" (s: string, byte_index: int) -> rune {…}

    rune_at_pos ¶

    rune_at_pos :: proc "contextless" (s: string, pos: int) -> rune {…}

    rune_size ¶

    rune_size :: proc "contextless" (r: rune) -> int {…}

    rune_start ¶

    rune_start :: proc "contextless" (b: u8) -> bool {…}

    rune_string_at_pos ¶

    rune_string_at_pos :: proc "contextless" (s: string, pos: int) -> string {…}

    string_to_runes ¶

    string_to_runes :: proc(s: string, allocator := context.allocator) -> (runes: []rune, err: runtime.Allocator_Error) #optional_ok {…}

    valid_rune ¶

    valid_rune :: proc "contextless" (r: rune) -> bool {…}

    valid_string ¶

    valid_string :: proc "contextless" (s: string) -> bool {…}

    Procedure Groups

    full_rune ¶

    full_rune :: proc{
    	full_rune_in_bytes,
    	full_rune_in_string,
    }
    
     

    full_rune reports if the bytes in b begin with a full utf-8 encoding of a rune or not An invalid encoding is considered a full rune since it will convert as an error rune of width 1 (RUNE_ERROR)

    Source Files

    Generation Information

    Generated with odin version dev-2026-03 (vendor "odin") Windows_amd64 @ 2026-03-18 21:27:25.745718300 +0000 UTC