package core:c/frontend/tokenizer

⌘K
Ctrl+K
or
/

    Overview

    Example:
    package demo
    
    import tokenizer "core:c/frontend/tokenizer"
    import preprocessor "core:c/frontend/preprocessor"
    import "core:fmt"
    
    main :: proc() {
    	t := &tokenizer.Tokenizer{};
    	tokenizer.init_defaults(t);
    
    	cpp := &preprocessor.Preprocessor{};
    	cpp.warn, cpp.err = t.warn, t.err;
    	preprocessor.init_lookup_tables(cpp);
    	preprocessor.init_default_macros(cpp);
    	cpp.include_paths = {"my/path/to/include"};
    
    	tok := tokenizer.tokenize_file(t, "the/source/file.c", 1);
    
    	tok = preprocessor.preprocess(cpp, tok);
    	if tok != nil {
    		for t := tok; t.kind != .EOF; t = t.next {
    			fmt.println(t.lit);
    		}
    	}
    
    	fmt.println("[Done]");
    }
    

    Types

    Error_Handler ¶

    Error_Handler :: proc(pos: Pos, fmt: string, .. args: ..any)
    Related Procedures With Parameters

    File ¶

    File :: struct {
    	name:         string,
    	id:           int,
    	src:          []u8,
    	display_name: string,
    	line_delta:   int,
    }
    Related Procedures With Parameters
    Related Procedures With Returns

    Hide_Set ¶

    Hide_Set :: struct {
    	next: ^Hide_Set,
    	name: string,
    }
    Related Procedures With Parameters
    Related Procedures With Returns

    Is_Keyword_Proc ¶

    Is_Keyword_Proc :: proc(tok: ^Token) -> bool

    Pos ¶

    Pos :: struct {
    	file:   string,
    	line:   int,
    	column: int,
    	offset: int,
    }
    Related Procedures With Parameters

    Token ¶

    Token :: struct {
    	kind:       Token_Kind,
    	next:       ^Token,
    	lit:        string,
    	pos:        Pos,
    	file:       ^File,
    	line_delta: int,
    	at_bol:     bool,
    	has_space:  bool,
    	type_hint:  Token_Type_Hint,
    	val:        Token_Value,
    	prefix:     string,
    	// Preprocessor values
    	hide_set:   ^Hide_Set,
    	origin:     ^Token,
    }
    Related Procedures With Parameters
    Related Procedures With Returns

    Token_Kind ¶

    Token_Kind :: enum int {
    	Invalid, 
    	Ident, 
    	Punct, 
    	Keyword, 
    	Char, 
    	String, 
    	Number, 
    	PP_Number, 
    	Comment, 
    	EOF, 
    }
    Related Procedures With Returns

    Token_Type_Hint ¶

    Token_Type_Hint :: enum u8 {
    	None, 
    	Int, 
    	Long, 
    	Long_Long, 
    	Unsigned_Int, 
    	Unsigned_Long, 
    	Unsigned_Long_Long, 
    	Float, 
    	Double, 
    	Long_Double, 
    	UTF_8, 
    	UTF_16, 
    	UTF_32, 
    	UTF_Wide, 
    }

    Token_Value ¶

    Token_Value :: union {
    	i64, 
    	f64, 
    	string, 
    	[]u16, 
    	[]u32, 
    }

    Tokenizer ¶

    Tokenizer :: struct {
    	// Immutable data
    	path:          string,
    	src:           []u8,
    	// Tokenizing state
    	ch:            rune,
    	offset:        int,
    	read_offset:   int,
    	line_offset:   int,
    	line_count:    int,
    	// Extra information for tokens
    	at_bol:        bool,
    	has_space:     bool,
    	// Mutable data
    	err:           Error_Handler,
    	warn:          Error_Handler,
    	error_count:   int,
    	warning_count: int,
    }
    Related Procedures With Parameters

    Constants

    This section is empty.

    Variables

    default_keyword_set ¶

    default_keyword_set: map[string]bool = …

    Procedures

    add_hide_set ¶

    add_hide_set :: proc(tok: ^Token, hs: ^Hide_Set) -> ^Token {…}

    add_new_file ¶

    add_new_file :: proc(t: ^Tokenizer, name: string, src: []u8, id: int) -> ^File {…}

    advance_rune ¶

    advance_rune :: proc(t: ^Tokenizer) {…}

    advance_rune_n ¶

    advance_rune_n :: proc(t: ^Tokenizer, n: int) {…}

    allow_next_to_be_newline ¶

    allow_next_to_be_newline :: proc(t: ^Tokenizer) -> bool {…}

    char_width ¶

    char_width :: proc(c: rune) -> int {…}
     

    Returns the number of columns needed to display a given character in a fixed-width font. Based on https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c

    copy_token ¶

    copy_token :: proc(tok: ^Token) -> ^Token {…}

    default_error_handler ¶

    default_error_handler :: proc(pos: Pos, msg: string, .. args: ..any) {…}

    default_is_keyword ¶

    default_is_keyword :: proc(tok: ^Token) -> bool {…}

    default_warn_handler ¶

    default_warn_handler :: proc(pos: Pos, msg: string, .. args: ..any) {…}

    digit_val ¶

    digit_val :: proc(r: rune) -> int {…}

    display_width ¶

    display_width :: proc(str: string) -> (w: int) {…}

    error ¶

    error :: proc(t: ^Tokenizer, tok: ^Token, msg: string, .. args: ..any) {…}

    error_offset ¶

    error_offset :: proc(t: ^Tokenizer, offset: int, msg: string, .. args: ..any) {…}

    hide_set_contains ¶

    hide_set_contains :: proc(hs: ^Hide_Set, name: string) -> bool {…}

    hide_set_intersection ¶

    hide_set_intersection :: proc(a, b: ^Hide_Set) -> ^Hide_Set {…}

    hide_set_union ¶

    hide_set_union :: proc(a, b: ^Hide_Set) -> ^Hide_Set {…}

    in_range ¶

    in_range :: proc(range: []rune, c: rune) -> bool {…}

    init_defaults ¶

    init_defaults :: proc(t: ^Tokenizer, err: Error_Handler = default_error_handler, warn: Error_Handler = default_warn_handler) {…}

    inline_tokenize ¶

    inline_tokenize :: proc(t: ^Tokenizer, tok: ^Token, src: []u8) -> ^Token {…}

    is_digit ¶

    is_digit :: proc(r: rune) -> bool {…}

    is_ident0 ¶

    is_ident0 :: proc(c: rune) -> bool {…}
     

    [https://www.sigbus.info/n1570#D] C11 allows ASCII and some multibyte characters in certan Unicode ranges to be used in an identifier.

    is_ident0 returns true if a given character is acceptable as the first character of an identifier.

    is_ident1 ¶

    is_ident1 :: proc(c: rune) -> bool {…}
     

    is_ident0 returns true if a given character is acceptable as a non-first character of an identifier.

    new_eof ¶

    new_eof :: proc(tok: ^Token) -> ^Token {…}

    new_hide_set ¶

    new_hide_set :: proc(name: string) -> ^Hide_Set {…}

    peek ¶

    peek :: proc(t: ^Tokenizer) -> u8 {…}

    peek_str ¶

    peek_str :: proc(t: ^Tokenizer, str: string) -> bool {…}

    scan ¶

    scan :: proc(t: ^Tokenizer, f: ^File) -> ^Token {…}

    scan_comment ¶

    scan_comment :: proc(t: ^Tokenizer) -> string {…}

    scan_escape ¶

    scan_escape :: proc(t: ^Tokenizer) -> bool {…}

    scan_identifier ¶

    scan_identifier :: proc(t: ^Tokenizer) -> string {…}

    scan_literal_prefix ¶

    scan_literal_prefix :: proc(t: ^Tokenizer, str: string, prefix: ^string) -> bool {…}

    scan_number ¶

    scan_number :: proc(t: ^Tokenizer, seen_decimal_point: bool) -> (Token_Kind, string) {…}

    scan_punct ¶

    scan_punct :: proc(t: ^Tokenizer, ch: rune) -> (kind: Token_Kind) {…}

    scan_rune ¶

    scan_rune :: proc(t: ^Tokenizer) -> string {…}

    scan_string ¶

    scan_string :: proc(t: ^Tokenizer) -> string {…}

    skip_whitespace ¶

    skip_whitespace :: proc(t: ^Tokenizer) {…}

    tokenize ¶

    tokenize :: proc(t: ^Tokenizer, f: ^File) -> ^Token {…}

    tokenize_file ¶

    tokenize_file :: proc(t: ^Tokenizer, path: string, id: int, loc := #caller_location) -> ^Token {…}

    warn ¶

    warn :: proc(t: ^Tokenizer, tok: ^Token, msg: string, .. args: ..any) {…}

    warn_offset ¶

    warn_offset :: proc(t: ^Tokenizer, offset: int, msg: string, .. args: ..any) {…}

    Procedure Groups

    This section is empty.

    Source Files

    Generation Information

    Generated with odin version dev-2024-09 (vendor "odin") Windows_amd64 @ 2024-09-17 21:11:34.247336500 +0000 UTC