package core:unicode/utf8
Source

⌘K

Ctrl+K

Filter Results

Index

Types (2)

Accept_Range
Grapheme

Constants (26)

HICB
LOCB
MASK2
MASK3
MASK4
MASKX
MAX_RUNE
RUNE1_MAX
RUNE2_MAX
RUNE3_MAX
RUNE_BOM
RUNE_EOF
RUNE_ERROR
RUNE_SELF
SURROGATE_HIGH_MAX
SURROGATE_LOW_MIN
SURROGATE_MAX
SURROGATE_MIN
T1
T2
T3
T4
T5
TX
UTF_MAX
ZERO_WIDTH_JOINER

Variables (2)

accept_ranges
accept_sizes

Procedures (21)

decode_grapheme_clusters
decode_last_rune_in_bytes
decode_last_rune_in_string
decode_rune_in_bytes
decode_rune_in_string
encode_rune
full_rune_in_bytes
full_rune_in_string
grapheme_count
rune_at
rune_at_pos
rune_count_in_bytes
rune_count_in_string
rune_offset
rune_size
rune_start
rune_string_at_pos
runes_to_string
string_to_runes
valid_rune
valid_string

Procedure Groups (4)

decode_last_rune
decode_rune
full_rune
rune_count

Types

Accept_Range ¶
Source

Accept_Range :: struct {
	lo: u8,
	hi: u8,
}

Grapheme ¶
Source

Grapheme :: struct {
	byte_index: int,
	rune_index: int,
	width:      int,
}

Constants

HICB ¶
Source

HICB :: 0b1011_1111

LOCB ¶
Source

LOCB :: 0b1000_0000

The default lowest and highest continuation byte.

MASK2 ¶
Source

MASK2 :: 0b0001_1111

MASK3 ¶
Source

MASK3 :: 0b0000_1111

MASK4 ¶
Source

MASK4 :: 0b0000_0111

MASKX ¶
Source

MASKX :: 0b0011_1111

MAX_RUNE ¶
Source

MAX_RUNE :: '\U0010ffff'

RUNE1_MAX ¶
Source

RUNE1_MAX :: 1 << 7 - 1

RUNE2_MAX ¶
Source

RUNE2_MAX :: 1 << 11 - 1

RUNE3_MAX ¶
Source

RUNE3_MAX :: 1 << 16 - 1

RUNE_BOM ¶
Source

RUNE_BOM :: 0xfeff

RUNE_EOF ¶
Source

RUNE_EOF: rune : ~rune(0)

RUNE_ERROR ¶
Source

RUNE_ERROR :: '\ufffd'

RUNE_SELF ¶
Source

RUNE_SELF :: 0x80

SURROGATE_HIGH_MAX ¶
Source

SURROGATE_HIGH_MAX :: 0xdbff

A high/leading surrogate is in range SURROGATE_MIN..SURROGATE_HIGH_MAX, A low/trailing surrogate is in range SURROGATE_LOW_MIN..SURROGATE_MAX.

SURROGATE_LOW_MIN ¶
Source

SURROGATE_LOW_MIN :: 0xdc00

SURROGATE_MAX ¶
Source

SURROGATE_MAX :: 0xdfff

SURROGATE_MIN ¶
Source

SURROGATE_MIN :: 0xd800

T1 ¶
Source

T1 :: 0b0000_0000

T2 ¶
Source

T2 :: 0b1100_0000

T3 ¶
Source

T3 :: 0b1110_0000

T4 ¶
Source

T4 :: 0b1111_0000

T5 ¶
Source

T5 :: 0b1111_1000

TX ¶
Source

TX :: 0b1000_0000

UTF_MAX ¶
Source

UTF_MAX :: 4

ZERO_WIDTH_JOINER ¶
Source

ZERO_WIDTH_JOINER :: unicode.ZERO_WIDTH_JOINER

Variables

accept_ranges ¶
Source

accept_ranges: [5]Accept_Range = …

accept_sizes ¶
Source

accept_sizes: [256]u8 = …

Procedures

decode_grapheme_clusters ¶
Source

decode_grapheme_clusters :: proc(str: string, track_graphemes: bool = true, allocator := context.allocator) -> (graphemes: [dynamic]Grapheme, grapheme_count: int, rune_count: int, width: int) {…}

Decode the individual graphemes in a UTF-8 string.

Allocates Using Provided Allocator

Inputs:
str: The input string. track_graphemes: Whether or not to allocate and return graphemes with extra data about each grapheme. allocator: (default: context.allocator)

Returns:
graphemes: Extra data about each grapheme. grapheme_count: The number of graphemes in the string. rune_count: The number of runes in the string. width: The width of the string in number of monospace cells.

decode_last_rune_in_bytes ¶
Source

decode_last_rune_in_bytes :: proc "contextless" (s: []u8) -> (rune, int) {…}

decode_last_rune_in_string ¶
Source

decode_last_rune_in_string :: proc "contextless" (s: string) -> (rune, int) {…}

decode_rune_in_bytes ¶
Source

decode_rune_in_bytes :: proc "contextless" (s: []u8) -> (rune, int) {…}

decode_rune_in_string ¶
Source

decode_rune_in_string :: proc "contextless" (s: string) -> (rune, int) {…}

encode_rune ¶
Source

encode_rune :: proc "contextless" (c: rune) -> ([4]u8, int) {…}

full_rune_in_bytes ¶
Source

full_rune_in_bytes :: proc "contextless" (b: []u8) -> bool {…}

full_rune_in_bytes reports if the bytes in b begin with a full utf-8 encoding of a rune or not An invalid encoding is considered a full rune since it will convert as an error rune of width 1 (RUNE_ERROR)

full_rune_in_string ¶
Source

full_rune_in_string :: proc "contextless" (s: string) -> bool {…}

full_rune_in_string reports if the bytes in s begin with a full utf-8 encoding of a rune or not An invalid encoding is considered a full rune since it will convert as an error rune of width 1 (RUNE_ERROR)

grapheme_count ¶
Source

grapheme_count :: proc(str: string) -> (graphemes, runes, width: int) {…}

Count the individual graphemes in a UTF-8 string.

Inputs:
str: The input string.

Returns:
graphemes: The number of graphemes in the string. runes: The number of runes in the string. width: The width of the string in number of monospace cells.

rune_at ¶
Source

rune_at :: proc "contextless" (s: string, byte_index: int) -> rune {…}

rune_at_pos ¶
Source

rune_at_pos :: proc "contextless" (s: string, pos: int) -> rune {…}

rune_count_in_bytes ¶
Source

rune_count_in_bytes :: proc "contextless" (s: []u8) -> int {…}

rune_count_in_string ¶
Source

rune_count_in_string :: proc(s: string) -> int {…}

rune_offset ¶
Source

rune_offset :: proc "contextless" (s: string, pos: int, start: int = 0) -> int {…}

Returns the byte position of rune at position pos in s with an optional start byte position. Returns -1 if it runs out of the string.

rune_size ¶
Source

rune_size :: proc "contextless" (r: rune) -> int {…}

rune_start ¶
Source

rune_start :: proc "contextless" (b: u8) -> bool {…}

rune_string_at_pos ¶
Source

rune_string_at_pos :: proc "contextless" (s: string, pos: int) -> string {…}

runes_to_string ¶
Source

runes_to_string :: proc(runes: []rune, allocator := context.allocator) -> string {…}

string_to_runes ¶
Source

string_to_runes :: proc(s: string, allocator := context.allocator) -> (runes: []rune) {…}

valid_rune ¶
Source

valid_rune :: proc "contextless" (r: rune) -> bool {…}

valid_string ¶
Source

valid_string :: proc "contextless" (s: string) -> bool {…}

Procedure Groups

decode_last_rune ¶
Source

decode_last_rune :: proc{
	decode_last_rune_in_string,
	decode_last_rune_in_bytes,
}

decode_rune ¶
Source

decode_rune :: proc{
	decode_rune_in_string,
	decode_rune_in_bytes,
}

full_rune ¶
Source

full_rune :: proc{
	full_rune_in_bytes,
	full_rune_in_string,
}

full_rune reports if the bytes in b begin with a full utf-8 encoding of a rune or not An invalid encoding is considered a full rune since it will convert as an error rune of width 1 (RUNE_ERROR)

rune_count ¶
Source

rune_count :: proc{
	rune_count_in_string,
	rune_count_in_bytes,
}

Source Files

Generation Information

Generated with odin version dev-2024-07 (vendor "odin") Windows_amd64 @ 2024-07-26 21:10:25.151956100 +0000 UTC

package core:unicode/utf8Source