tokenizer
Source

⌘K

Ctrl+K

Filter Results

Overview

Example:

package demo

import tokenizer "core:c/frontend/tokenizer"
import preprocessor "core:c/frontend/preprocessor"
import "core:fmt"

main :: proc() {
	t := &tokenizer.Tokenizer{};
	tokenizer.init_defaults(t);

	cpp := &preprocessor.Preprocessor{};
	cpp.warn, cpp.err = t.warn, t.err;
	preprocessor.init_lookup_tables(cpp);
	preprocessor.init_default_macros(cpp);
	cpp.include_paths = {"my/path/to/include"};

	tok := tokenizer.tokenize_file(t, "the/source/file.c", 1);

	tok = preprocessor.preprocess(cpp, tok);
	if tok != nil {
		for t := tok; t.kind != .EOF; t = t.next {
			fmt.println(t.lit);
		}
	}

	fmt.println("[Done]");
}

Index

Types (10)

Error_Handler
File
Hide_Set
Is_Keyword_Proc
Pos
Token
Token_Kind
Token_Type_Hint
Token_Value
Tokenizer

Constants (0)

This section is empty.

Variables (2)

default_keyword_set
token_name

Procedures (41)

add_hide_set
add_new_file
advance_rune
advance_rune_n
allow_next_to_be_newline
char_width
copy_token
default_error_handler
default_is_keyword
default_warn_handler
digit_val
display_width
error
error_offset
hide_set_contains
hide_set_intersection
hide_set_union
in_range
init_defaults
inline_tokenize
is_digit
is_ident0
is_ident1
new_eof
new_hide_set
peek
peek_str
scan
scan_comment
scan_escape
scan_identifier
scan_literal_prefix
scan_number
scan_punct
scan_rune
scan_string
skip_whitespace
tokenize
tokenize_file
warn
warn_offset

Procedure Groups (0)

This section is empty.

Types

Error_Handler ¶
Source

Error_Handler :: proc(pos: Pos, fmt: string, .. args: ..any)

Related Procedures With Parameters

init_defaults

File ¶
Source

File :: struct {
	name:         string,
	id:           int,
	src:          []u8,
	display_name: string,
	line_delta:   int,
}

Related Procedures With Parameters

Related Procedures With Returns

add_new_file

Hide_Set ¶
Source

Hide_Set :: struct {
	next: ^Hide_Set,
	name: string,
}

Related Procedures With Parameters

Related Procedures With Returns

new_hide_set

Is_Keyword_Proc ¶
Source

Is_Keyword_Proc :: proc(tok: ^Token) -> bool

Pos ¶
Source

Pos :: struct {
	file:   string,
	line:   int,
	column: int,
	offset: int,
}

Related Procedures With Parameters

Token ¶
Source

Token :: struct {
	kind:       Token_Kind,
	next:       ^Token,
	lit:        string,
	pos:        Pos,
	file:       ^File,
	line_delta: int,
	at_bol:     bool,
	has_space:  bool,
	type_hint:  Token_Type_Hint,
	val:        Token_Value,
	prefix:     string,
	// Preprocessor values
	hide_set:   ^Hide_Set,
	origin:     ^Token,
}

Related Procedures With Parameters

Related Procedures With Returns

Token_Kind ¶
Source

Token_Kind :: enum int {
	Invalid, 
	Ident, 
	Punct, 
	Keyword, 
	Char, 
	String, 
	Number, 
	PP_Number, 
	Comment, 
	EOF, 
}

Related Procedures With Returns

Token_Type_Hint ¶
Source

Token_Type_Hint :: enum u8 {
	None, 
	Int, 
	Long, 
	Long_Long, 
	Unsigned_Int, 
	Unsigned_Long, 
	Unsigned_Long_Long, 
	Float, 
	Double, 
	Long_Double, 
	UTF_8, 
	UTF_16, 
	UTF_32, 
	UTF_Wide, 
}

Token_Value ¶
Source

Token_Value :: union {
	i64, 
	f64, 
	string, 
	[]u16, 
	[]u32, 
}

Tokenizer ¶
Source

Tokenizer :: struct {
	// Immutable data
	path:          string,
	src:           []u8,
	// Tokenizing state
	ch:            rune,
	offset:        int,
	read_offset:   int,
	line_offset:   int,
	line_count:    int,
	// Extra information for tokens
	at_bol:        bool,
	has_space:     bool,
	// Mutable data
	err:           Error_Handler,
	warn:          Error_Handler,
	error_count:   int,
	warning_count: int,
}

Related Procedures With Parameters

Constants

This section is empty.

Variables

default_keyword_set ¶
Source

default_keyword_set: map[string]bool = …

token_name ¶
Source

token_name: [Token_Kind]string = …

Procedures

add_hide_set ¶
Source

add_hide_set :: proc(tok: ^Token, hs: ^Hide_Set) -> ^Token {…}

add_new_file ¶
Source

add_new_file :: proc(t: ^Tokenizer, name: string, src: []u8, id: int) -> ^File {…}

advance_rune ¶
Source

advance_rune :: proc(t: ^Tokenizer) {…}

advance_rune_n ¶
Source

advance_rune_n :: proc(t: ^Tokenizer, n: int) {…}

allow_next_to_be_newline ¶
Source

allow_next_to_be_newline :: proc(t: ^Tokenizer) -> bool {…}

char_width ¶
Source

char_width :: proc(c: rune) -> int {…}

Returns the number of columns needed to display a given character in a fixed-width font. Based on https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c

copy_token ¶
Source

copy_token :: proc(tok: ^Token) -> ^Token {…}

default_error_handler ¶
Source

default_error_handler :: proc(pos: Pos, msg: string, .. args: ..any) {…}

default_is_keyword ¶
Source

default_is_keyword :: proc(tok: ^Token) -> bool {…}

default_warn_handler ¶
Source

default_warn_handler :: proc(pos: Pos, msg: string, .. args: ..any) {…}

digit_val ¶
Source

digit_val :: proc(r: rune) -> int {…}

display_width ¶
Source

display_width :: proc(str: string) -> (w: int) {…}

error ¶
Source

error :: proc(t: ^Tokenizer, tok: ^Token, msg: string, .. args: ..any) {…}

error_offset ¶
Source

error_offset :: proc(t: ^Tokenizer, offset: int, msg: string, .. args: ..any) {…}

hide_set_contains ¶
Source

hide_set_contains :: proc(hs: ^Hide_Set, name: string) -> bool {…}

hide_set_intersection ¶
Source

hide_set_intersection :: proc(a, b: ^Hide_Set) -> ^Hide_Set {…}

hide_set_union ¶
Source

hide_set_union :: proc(a, b: ^Hide_Set) -> ^Hide_Set {…}

in_range ¶
Source

in_range :: proc(range: []rune, c: rune) -> bool {…}

init_defaults ¶
Source

init_defaults :: proc(t: ^Tokenizer, err: Error_Handler = default_error_handler, warn: Error_Handler = default_warn_handler) {…}

inline_tokenize ¶
Source

inline_tokenize :: proc(t: ^Tokenizer, tok: ^Token, src: []u8) -> ^Token {…}

is_digit ¶
Source

is_digit :: proc(r: rune) -> bool {…}

is_ident0 ¶
Source

is_ident0 :: proc(c: rune) -> bool {…}

[https://www.sigbus.info/n1570#D] C11 allows ASCII and some multibyte characters in certan Unicode ranges to be used in an identifier.

is_ident0 returns true if a given character is acceptable as the first character of an identifier.

is_ident1 ¶
Source

is_ident1 :: proc(c: rune) -> bool {…}

is_ident0 returns true if a given character is acceptable as a non-first character of an identifier.

new_eof ¶
Source

new_eof :: proc(tok: ^Token) -> ^Token {…}

new_hide_set ¶
Source

new_hide_set :: proc(name: string) -> ^Hide_Set {…}

peek ¶
Source

peek :: proc(t: ^Tokenizer) -> u8 {…}

peek_str ¶
Source

peek_str :: proc(t: ^Tokenizer, str: string) -> bool {…}

scan ¶
Source

scan :: proc(t: ^Tokenizer, f: ^File) -> ^Token {…}

scan_comment ¶
Source

scan_comment :: proc(t: ^Tokenizer) -> string {…}

scan_escape ¶
Source

scan_escape :: proc(t: ^Tokenizer) -> bool {…}

scan_identifier ¶
Source

scan_identifier :: proc(t: ^Tokenizer) -> string {…}

scan_literal_prefix ¶
Source

scan_literal_prefix :: proc(t: ^Tokenizer, str: string, prefix: ^string) -> bool {…}

scan_number ¶
Source

scan_number :: proc(t: ^Tokenizer, seen_decimal_point: bool) -> (Token_Kind, string) {…}

scan_punct ¶
Source

scan_punct :: proc(t: ^Tokenizer, ch: rune) -> (kind: Token_Kind) {…}

scan_rune ¶
Source

scan_rune :: proc(t: ^Tokenizer) -> string {…}

scan_string ¶
Source

scan_string :: proc(t: ^Tokenizer) -> string {…}

skip_whitespace ¶
Source

skip_whitespace :: proc(t: ^Tokenizer) {…}

tokenize ¶
Source

tokenize :: proc(t: ^Tokenizer, f: ^File) -> ^Token {…}

tokenize_file ¶
Source

tokenize_file :: proc(t: ^Tokenizer, path: string, id: int, loc := #caller_location) -> ^Token {…}

warn ¶
Source

warn :: proc(t: ^Tokenizer, tok: ^Token, msg: string, .. args: ..any) {…}

warn_offset ¶
Source

warn_offset :: proc(t: ^Tokenizer, offset: int, msg: string, .. args: ..any) {…}

Procedure Groups

This section is empty.

Source Files

Generation Information

Generated with odin version dev-2024-07 (vendor "odin") Windows_amd64 @ 2024-07-26 21:10:24.154085800 +0000 UTC

package core:c/frontend/tokenizerSource

Overview

Index

Types

Related Procedures With Parameters

Related Procedures With Parameters

Related Procedures With Returns

Related Procedures With Parameters

Related Procedures With Returns

Related Procedures With Parameters

Related Procedures With Parameters

Related Procedures With Returns

Related Procedures With Returns

Related Procedures With Parameters

Constants

Variables

Procedures

Procedure Groups

Source Files

Generation Information

package core:c/frontend/tokenizer
Source