gumbo-parser

Synopsis

Gumbo - A pure-C HTML5 parser, adapted to be installable via clib.

Version
0.1.0
Source
License
Apache License, Version 2.0

API

abandon_current_tag
acknowledge_self_closing_tag
add_codepoint_error
add_duplicate_attr_error
add_error
add_formatting_element
add_named_reference_error
add_no_digit_error
add_parse_error
adjust_foreign_attributes
adjust_mathml_attributes
adjust_svg_attributes
adoption_agency_algorithm
all_attributes_match
append_char_to_tag_buffer
append_char_to_temporary_buffer
append_comment_node
append_node
attribute_matches
attribute_matches_case_sensitive
clear_active_formatting_elements
clear_stack_to_table_body_context
clear_stack_to_table_context
clear_stack_to_table_row_context
clear_temporary_buffer
clone_node
close_current_cell
close_current_select
close_table
close_table_cell
compute_quirks_mode
consume_char_ref
consume_named_ref
consume_numeric_ref
copy_over_original_tag_text
copy_over_tag_buffer
count_formatting_elements_of_tag
create_element
create_element_from_token
create_node
destroy_node
doc_type_state_init
doctype_matches
emit_char
emit_char_ref
emit_comment
emit_current_char
emit_current_tag
emit_doctype
emit_eof
emit_replacement_char
emit_temporary_buffer
enlarge_vector_if_full
ensure_lowercase
find_last_anchor_index
find_last_newline
find_named_char_ref
find_next_newline
finish_attribute_name
finish_attribute_value
finish_doctype_public_id
finish_doctype_system_id
finish_parsing
finish_tag_name
finish_temporary_buffer
finish_token
foster_parent_element
free_wrapper
generate_implied_end_tags
get_appropriate_insertion_mode
get_char_token_type
get_current_node
get_current_template_insertion_mode
get_document_node
gumbo_add_error
gumbo_caret_diagnostic_to_string
gumbo_copy_stringz
gumbo_debug
gumbo_destroy_attribute
gumbo_destroy_errors
gumbo_destroy_node
gumbo_destroy_output
gumbo_error_destroy
gumbo_error_to_string
gumbo_get_attribute
gumbo_init_errors
gumbo_lex
gumbo_normalize_svg_tagname
gumbo_normalized_tagname
gumbo_parse
gumbo_parse_with_options
gumbo_parser_allocate
gumbo_parser_deallocate
gumbo_print_caret_diagnostic
gumbo_string_buffer_append_codepoint
gumbo_string_buffer_append_string
gumbo_string_buffer_destroy
gumbo_string_buffer_init
gumbo_string_buffer_reserve
gumbo_string_buffer_to_string
gumbo_string_copy
gumbo_string_equals
gumbo_string_equals_ignore_case
gumbo_tag_enum
gumbo_tag_from_original_text
gumbo_token_destroy
gumbo_tokenizer_set_is_current_node_foreign
gumbo_tokenizer_set_state
gumbo_tokenizer_state_destroy
gumbo_tokenizer_state_init
gumbo_vector_add
gumbo_vector_destroy
gumbo_vector_index_of
gumbo_vector_init
gumbo_vector_insert_at
gumbo_vector_pop
gumbo_vector_remove
gumbo_vector_remove_at
handle_after_after_body
handle_after_after_frameset
handle_after_attr_name_state
handle_after_attr_value_quoted_state
handle_after_body
handle_after_doctype_name_state
handle_after_doctype_public_id_state
handle_after_doctype_public_keyword_state
handle_after_doctype_system_id_state
handle_after_doctype_system_keyword_state
handle_after_frameset
handle_after_head
handle_attr_name_state
handle_attr_value_double_quoted_state
handle_attr_value_single_quoted_state
handle_attr_value_unquoted_state
handle_before_attr_name_state
handle_before_attr_value_state
handle_before_doctype_name_state
handle_before_doctype_public_id_state
handle_before_doctype_system_id_state
handle_before_head
handle_before_html
handle_between_doctype_public_system_id_state
handle_bogus_comment_state
handle_bogus_doctype_state
handle_cdata_state
handle_char_ref_in_attr_value_state
handle_char_ref_in_data_state
handle_char_ref_in_rcdata_state
handle_comment_end_bang_state
handle_comment_end_dash_state
handle_comment_end_state
handle_comment_start_dash_state
handle_comment_start_state
handle_comment_state
handle_data_state
handle_doctype_name_state
handle_doctype_public_id_double_quoted_state
handle_doctype_public_id_single_quoted_state
handle_doctype_state
handle_doctype_system_id_double_quoted_state
handle_doctype_system_id_single_quoted_state
handle_end_tag_open_state
handle_html_content
handle_in_body
handle_in_caption
handle_in_cell
handle_in_column_group
handle_in_foreign_content
handle_in_frameset
handle_in_head
handle_in_head_noscript
handle_in_row
handle_in_select
handle_in_select_in_table
handle_in_table
handle_in_table_body
handle_in_table_text
handle_in_template
handle_initial
handle_markup_declaration_state
handle_parser_error
handle_plaintext_state
handle_rawtext_end_tag_name_state
handle_rawtext_end_tag_open_state
handle_rawtext_lt_state
handle_rawtext_state
handle_rcdata_end_tag_name_state
handle_rcdata_end_tag_open_state
handle_rcdata_lt_state
handle_rcdata_state
handle_script_double_escaped_dash_dash_state
handle_script_double_escaped_dash_state
handle_script_double_escaped_end_state
handle_script_double_escaped_lt_state
handle_script_double_escaped_start_state
handle_script_double_escaped_state
handle_script_end_tag_name_state
handle_script_end_tag_open_state
handle_script_escaped_dash_dash_state
handle_script_escaped_dash_state
handle_script_escaped_end_tag_name_state
handle_script_escaped_end_tag_open_state
handle_script_escaped_lt_state
handle_script_escaped_start_dash_state
handle_script_escaped_start_state
handle_script_escaped_state
handle_script_lt_state
handle_script_state
handle_self_closing_start_tag_state
handle_tag_name_state
handle_tag_open_state
handle_text
handle_token
has_an_element_in_button_scope
has_an_element_in_list_scope
has_an_element_in_scope
has_an_element_in_scope_with_tagname
has_an_element_in_select_scope
has_an_element_in_specific_scope
has_an_element_in_table_scope
has_node_in_scope
ignore_token
implicitly_close_tags
initialize_tag_buffer
insert_element
insert_element_from_token
insert_element_of_tag_type
insert_foreign_element
insert_node
insert_text_token
is_alpha
is_appropriate_end_tag
is_html_integration_point
is_in_static_list
is_legal_attribute_char_next
is_mathml_integration_point
is_open_element
is_special_node
malloc_wrapper
mark_tag_state_as_empty
maybe_add_doctype_error
maybe_add_invalid_named_reference
maybe_emit_from_temporary_buffer
maybe_flush_text_node_buffer
maybe_implicitly_close_list_tag
maybe_implicitly_close_p_tag
maybe_replace_codepoint
maybe_resize_string_buffer
merge_attributes
new_document_node
node_tag_in
node_tag_is
output_init
parse_digit
parser_state_destroy
parser_state_init
pop_current_node
pop_template_insertion_mode
print_message
print_tag_stack
push_template_insertion_mode
read_char
reconstruct_active_formatting_elements
record_end_of_element
reinitialize_tag_buffer
remove_from_parent
reset_insertion_mode_appropriately
reset_tag_buffer_start_point
reset_token_start_point
run_generic_parsing_algorithm
set_frameset_not_ok
set_insertion_mode
start_new_tag
tag_in
tag_is
temporary_buffer_equals
token_has_attribute
update_position
utf8_is_invalid_code_point
utf8iterator_current
utf8iterator_fill_error_at_mark
utf8iterator_get_char_pointer
utf8iterator_get_position
utf8iterator_init
utf8iterator_mark
utf8iterator_maybe_consume_match
utf8iterator_next
utf8iterator_reset

Details

_NamespacedAttributeReplacement

_ReplacementEntry

_TextNodeBufferState

CharReplacement

GumboAttribute

The namespace for the attribute. This will usually be GUMBO_ATTR_NAMESPACE_NONE, but some XLink/XMLNS/XML attributes take special values, per:

The name of the attribute. This is in a freshly-allocated buffer to deal with case-normalization, and is null-terminated.

The original text of the attribute name, as a pointer into the original source buffer.

The value of the attribute. This is in a freshly-allocated buffer to deal with unescaping, and is null-terminated. It does not include any quotes that surround the attribute. If the attribute has no value (for example, 'selected' on a checkbox), this will be an empty string.

The original text of the value of the attribute. This points into the original source buffer. It includes any quotes that surround the attribute, and you can look at original_value.data[0] and original_value.data[original_value.length - 1] to determine what the quote characters were. If the attribute has no value, this will be a 0-length string.

The starting position of the attribute name.

The ending position of the attribute name. This is not always derivable from the starting position of the value because of the possibility of whitespace around the = sign.

The starting position of the attribute value.

The ending position of the attribute value.

A struct representing a single attribute on an HTML tag. This is a name-value pair, but also includes information about source locations and original source text.


GumboDocument

An array of GumboNodes, containing the children of this element. This will normally consist of the element and any comment nodes found. Pointers are owned.

Whether or not the document is in QuirksMode, as determined by the values in the GumboTokenDocType template.

Information specific to document nodes.


GumboElement

An array of GumboNodes, containing the children of this element. Pointers are owned.

The GumboTag enum for this element.

The GumboNamespaceEnum for this element.

A

A

The source position for the start of the start tag.

The source position for the start of the end tag.

An array of GumboAttributes, containing the attributes for this tag in the order that they were parsed. Pointers are owned.

The struct used to represent all HTML elements. This contains information about the tag, attributes, and child nodes.


GumboInternalDuplicateAttrError

GumboInternalError

GumboInternalNode

The type of node that this is.

Pointer back to parent node. Not owned.

The index within the parent's children vector of this node.

A bitvector of flags containing information about why this element was inserted into the parse tree, including a variety of special parse situations.

The actual node data.

A supertype for


GumboInternalOptions

A memory allocator function. Default: malloc.

A memory deallocator function. Default: free.

An opaque object that's passed in as the first argument to all callbacks used by this library. Default: NULL.

The tab-stop size, for computing positions in source code that uses tabs. Default: 8.

Whether or not to stop parsing when the first error is encountered. Default: false.

The maximum number of errors before the parser stops recording them. This is provided so that if the page is totally borked, we don't completely fill up the errors vector and exhaust memory with useless redundant errors. Set to -1 to disable the limit. Default: -1

Input struct containing configuration options for the parser. These let you specify alternate memory managers, provide different error handling, etc. Use kGumboDefaultOptions for sensible defaults, and only set what you need.


GumboInternalOutput

Pointer to the document node. This is a GumboNode of type NODE_DOCUMENT that contains the entire document as its child.

Pointer to the root node. This the tag that forms the root of the document.

A list of errors that occurred during the parse. NOTE: In version 1.0 of this library, the API for errors hasn't been fully fleshed out and may change in the future. For this reason, the GumboError header isn't part of the public API. Contact us if you need errors reported so we can work out something appropriate for your use-case.

The output struct containing the results of the parse.


GumboInternalParser

GumboInternalParserError

GumboInternalParserState

GumboInternalTagState

GumboInternalToken

GumboInternalTokenDocType

GumboInternalTokenizerError

GumboInternalTokenizerState

GumboInternalTokenStartTag

GumboInternalUtf8Iterator

GumboSourcePosition

A struct representing a character position within the original text buffer. Line and column numbers are 1-based and offsets are 0-based, which matches how most editors and command-line tools work. Also, columns measure positions in terms of characters while offsets measure by bytes; this is because the offset field is often used to pull out a particular region of text (which in most languages that bind to C implies pointer arithmetic on a buffer of bytes), while the column field is often used to reference a particular column on a printable display, which nowadays is usually UTF-8.


GumboStringBuffer

GumboStringPiece

A pointer to the beginning of the string. NULL iff length == 0.

The length of the string fragment, in bytes. May be zero.

A struct representing a string or part of a string. Strings within the parser are represented by a char* and a length; the char* points into an existing data buffer owned by some other code (often the original input). GumboStringPieces are assumed (by convention) to be immutable, because they may share data. Use


GumboText

The text of this node, after entities have been parsed and decoded. For comment/cdata nodes, this does not include the comment delimiters.

The original text of this node, as a pointer into the original buffer. For comment/cdata nodes, this includes the comment delimiters.

The starting position of this node. This corresponds to the position of original_text, before entities are decoded.

The struct used to represent TEXT, CDATA, COMMENT, and WHITESPACE elements. This contains just a block of text and its position.


GumboVector

Data elements. This points to a dynamically-allocated array of capacity elements, each a void* to the element itself.

Number of elements currently in the vector.

Current array capacity.

A simple vector implementation. This stores a pointer to a data array and a length. All elements are stored as void*; client code must cast to the appropriate type. Overflows upon addition result in reallocation of the data array, with the size doubling to maintain O(1) amortized cost. There is no removal function, as this isn't needed for any of the operations within this library. Iteration can be done through inspecting the structure directly in a for-loop.


NamedCharRef

OneOrTwoCodepoints

abandon_current_tag ()
void
abandon_current_tag (GumboParser parser);

acknowledge_self_closing_tag ()
void
acknowledge_self_closing_tag(GumboParser parser);

add_codepoint_error ()
void
add_codepoint_error (struct  parser, Utf8Iterator input, GumboErrorType type, int codepoint);

add_duplicate_attr_error ()
void
add_duplicate_attr_error(GumboParser parser, const char * attr_name, int original_index, int new_index);

add_error ()
void
add_error           (Utf8Iterator iter, GumboErrorType type);

add_formatting_element ()
void
add_formatting_element(GumboParser parser, const  node);

add_named_reference_error ()
void
add_named_reference_error(struct  parser, Utf8Iterator input, GumboErrorType type, GumboStringPiece text);

add_no_digit_error ()
void
add_no_digit_error  (struct  parser, Utf8Iterator input);

add_parse_error ()
void
add_parse_error     (GumboParser parser, GumboErrorType type);

adjust_foreign_attributes ()
void
adjust_foreign_attributes(GumboParser parser, GumboToken token);

adjust_mathml_attributes ()
void
adjust_mathml_attributes(GumboParser parser, GumboToken token);

adjust_svg_attributes ()
void
adjust_svg_attributes(GumboParser parser, GumboToken token);

adoption_agency_algorithm ()
bool
adoption_agency_algorithm(GumboParser parser, GumboToken token, GumboTag closing_tag);

all_attributes_match ()
bool
all_attributes_match(const  attr1, const  attr2);

append_char_to_tag_buffer ()
void
append_char_to_tag_buffer(GumboParser parser, int codepoint, bool reinitilize_position_on_first);

append_char_to_temporary_buffer ()
void
append_char_to_temporary_buffer(GumboParser parser, int codepoint);

append_comment_node ()
void
append_comment_node (GumboParser parser, GumboNode node, const  token);

append_node ()
void
append_node         (GumboParser parser, GumboNode parent, GumboNode node);

attribute_matches ()
bool
attribute_matches   (const  attributes, const char * name, const char * value);

attribute_matches_case_sensitive ()
bool
attribute_matches_case_sensitive(const  attributes, const char * name, const char * value);

clear_active_formatting_elements ()
void
clear_active_formatting_elements(GumboParser parser);

clear_stack_to_table_body_context ()
void
clear_stack_to_table_body_context(GumboParser parser);

clear_stack_to_table_context ()
void
clear_stack_to_table_context(GumboParser parser);

clear_stack_to_table_row_context ()
void
clear_stack_to_table_row_context(GumboParser parser);

clear_temporary_buffer ()
void
clear_temporary_buffer(GumboParser parser);

clone_node ()
GumboNode
clone_node          (GumboParser parser, const  node, GumboParseFlags reason);

close_current_cell ()
bool
close_current_cell  (GumboParser parser, const  token);

close_current_select ()
void
close_current_select(GumboParser parser);

close_table ()
bool
close_table         (GumboParser parser);

close_table_cell ()
bool
close_table_cell    (GumboParser parser, const  token, GumboTag cell_tag);

compute_quirks_mode ()
GumboQuirksModeEnum
compute_quirks_mode (const  doctype);

consume_char_ref ()
bool
consume_char_ref    (struct  parser, struct  input, int additional_allowed_char, bool is_in_attribute, OneOrTwoCodepoints output);

consume_named_ref ()
bool
consume_named_ref   (struct  parser, Utf8Iterator input, bool is_in_attribute, OneOrTwoCodepoints output);

consume_numeric_ref ()
bool
consume_numeric_ref (struct  parser, Utf8Iterator input, int * output);

copy_over_original_tag_text ()
void
copy_over_original_tag_text(GumboParser parser, GumboStringPiece original_text, GumboSourcePosition start_pos, GumboSourcePosition end_pos);

copy_over_tag_buffer ()
void
copy_over_tag_buffer(GumboParser parser, const char ** output);

count_formatting_elements_of_tag ()
int
count_formatting_elements_of_tag(GumboParser parser, const  desired_node, int * earliest_matching_index);

create_element ()
GumboNode
create_element      (GumboParser parser, GumboTag tag);

create_element_from_token ()
GumboNode
create_element_from_token(GumboParser parser, GumboToken token, GumboNamespaceEnum tag_namespace);

create_node ()
GumboNode
create_node         (GumboParser parser, GumboNodeType type);

destroy_node ()
void
destroy_node        (GumboParser parser, GumboNode node);

doc_type_state_init ()
void
doc_type_state_init (GumboParser parser);

doctype_matches ()
bool
doctype_matches     (const  doctype, const  public_id, const  system_id, bool allow_missing_system_id);

emit_char ()
void
emit_char           (GumboParser parser, int c, GumboToken output);

emit_char_ref ()
StateResult
emit_char_ref       (GumboParser parser, int additional_allowed_char, bool is_in_attribute, GumboToken output);

emit_comment ()
StateResult
emit_comment        (GumboParser parser, GumboToken output);

emit_current_char ()
bool
emit_current_char   (GumboParser parser, GumboToken output);

emit_current_tag ()
StateResult
emit_current_tag    (GumboParser parser, GumboToken output);

emit_doctype ()
void
emit_doctype        (GumboParser parser, GumboToken output);

emit_eof ()
StateResult
emit_eof            (GumboParser parser, GumboToken output);

emit_replacement_char ()
StateResult
emit_replacement_char(GumboParser parser, GumboToken output);

emit_temporary_buffer ()
bool
emit_temporary_buffer(GumboParser parser, GumboToken output);

enlarge_vector_if_full ()
void
enlarge_vector_if_full(struct  parser, GumboVector vector);

ensure_lowercase ()
int
ensure_lowercase    (int c);

find_last_anchor_index ()
bool
find_last_anchor_index(GumboParser parser, int * anchor_index);

find_last_newline ()
const char *
find_last_newline   (const char * original_text, const char * error_location);

find_named_char_ref ()
const 
find_named_char_ref (Utf8Iterator input);

find_next_newline ()
const char *
find_next_newline   (const char * original_text, const char * error_location);

finish_attribute_name ()
bool
finish_attribute_name(GumboParser parser);

finish_attribute_value ()
void
finish_attribute_value(GumboParser parser);

finish_doctype_public_id ()
void
finish_doctype_public_id(GumboParser parser);

finish_doctype_system_id ()
void
finish_doctype_system_id(GumboParser parser);

finish_parsing ()
void
finish_parsing      (GumboParser parser);

finish_tag_name ()
void
finish_tag_name     (GumboParser parser);

finish_temporary_buffer ()
void
finish_temporary_buffer(GumboParser parser, const char ** output);

finish_token ()
void
finish_token        (GumboParser parser, GumboToken token);

foster_parent_element ()
void
foster_parent_element(GumboParser parser, GumboNode node);

free_wrapper ()
void
free_wrapper        (void * unused, void * ptr);

generate_implied_end_tags ()
void
generate_implied_end_tags(GumboParser parser, GumboTag exception);

get_appropriate_insertion_mode ()
GumboInsertionMode
get_appropriate_insertion_mode(const  node, bool is_last);

get_char_token_type ()
GumboTokenType
get_char_token_type (int c);

get_current_node ()
GumboNode
get_current_node    (GumboParser parser);

get_current_template_insertion_mode ()
GumboInsertionMode
get_current_template_insertion_mode(GumboParser parser);

get_document_node ()
GumboNode
get_document_node   (GumboParser parser);

gumbo_add_error ()
GumboError
gumbo_add_error     (struct  parser);

gumbo_caret_diagnostic_to_string ()
void
gumbo_caret_diagnostic_to_string(struct  parser, const  error, const char * source_text, GumboStringBuffer output);

gumbo_copy_stringz ()
char *
gumbo_copy_stringz  (struct  parser, const char * str);

gumbo_debug ()
void
gumbo_debug         (const char * format, ... );

gumbo_destroy_attribute ()
void
gumbo_destroy_attribute(struct  parser, GumboAttribute attribute);

gumbo_destroy_errors ()
void
gumbo_destroy_errors(struct  errors);

gumbo_destroy_node ()
void
gumbo_destroy_node  (GumboOptions options, GumboNode node);

gumbo_destroy_output ()
void
gumbo_destroy_output(const  options, GumboOutput output);

Release the memory used for the parse tree & parse errors.


gumbo_error_destroy ()
void
gumbo_error_destroy (struct  parser, GumboError error);

gumbo_error_to_string ()
void
gumbo_error_to_string(struct  parser, const  error, GumboStringBuffer output);

gumbo_get_attribute ()
GumboAttribute
gumbo_get_attribute (const  attrs, const char * name);

Given a vector of GumboAttributes, look up the one with the specified name and return it, or NULL if no such attribute exists. This uses a case-insensitive match, as HTML is case-insensitive.


gumbo_init_errors ()
void
gumbo_init_errors   (struct  errors);

gumbo_lex ()
bool
gumbo_lex           (struct  parser, GumboToken output);

gumbo_normalize_svg_tagname ()
const char *
gumbo_normalize_svg_tagname(const  tagname);

Fixes the case of SVG elements that are not all lowercase.


gumbo_normalized_tagname ()
const char *
gumbo_normalized_tagname(GumboTag tag);

Returns the normalized (usually all-lowercased, except for foreign content) tag name for an GumboTag enum. Return value is static data owned by the library.


gumbo_parse ()
GumboOutput
gumbo_parse         (const char * buffer);

Parses a buffer of UTF8 text into an GumboNode parse tree. The buffer must live at least as long as the parse tree, as some fields (eg. original_text) point directly into the original buffer.

This doesn't support buffers longer than 4 gigabytes.


gumbo_parse_with_options ()
GumboOutput
gumbo_parse_with_options(const  options, const char * buffer, size_t buffer_length);

Extended version of gumbo_parse that takes an explicit options structure, buffer, and length.


gumbo_parser_allocate ()
void *
gumbo_parser_allocate(struct  parser, size_t num_bytes);

gumbo_parser_deallocate ()
void
gumbo_parser_deallocate(struct  parser, void * ptr);

gumbo_print_caret_diagnostic ()
void
gumbo_print_caret_diagnostic(struct  parser, const  error, const char * source_text);

gumbo_string_buffer_append_codepoint ()
void
gumbo_string_buffer_append_codepoint(struct  parser, int c, GumboStringBuffer output);

gumbo_string_buffer_append_string ()
void
gumbo_string_buffer_append_string(struct  parser, GumboStringPiece str, GumboStringBuffer output);

gumbo_string_buffer_destroy ()
void
gumbo_string_buffer_destroy(struct  parser, GumboStringBuffer buffer);

gumbo_string_buffer_init ()
void
gumbo_string_buffer_init(struct  parser, GumboStringBuffer output);

gumbo_string_buffer_reserve ()
void
gumbo_string_buffer_reserve(struct  parser, size_t min_capacity, GumboStringBuffer output);

gumbo_string_buffer_to_string ()
char *
gumbo_string_buffer_to_string(struct  parser, GumboStringBuffer input);

gumbo_string_copy ()
void
gumbo_string_copy   (struct  parser, GumboStringPiece dest, const  source);

gumbo_string_equals ()
bool
gumbo_string_equals (const  str1, const  str2);

Compares two GumboStringPieces, and returns true if they're equal or false otherwise.


gumbo_string_equals_ignore_case ()
bool
gumbo_string_equals_ignore_case(const  str1, const  str2);

Compares two GumboStringPieces ignoring case, and returns true if they're equal or false otherwise.


gumbo_tag_enum ()
GumboTag
gumbo_tag_enum      (const char * tagname);

Converts a tag name string (which may be in upper or mixed case) to a tag enum.


gumbo_tag_from_original_text ()
void
gumbo_tag_from_original_text(GumboStringPiece text);

Extracts the tag name from the original_text field of an element or token by stripping off characters and attributes and adjusting the passed-in


gumbo_token_destroy ()
void
gumbo_token_destroy (struct  parser, GumboToken token);

gumbo_tokenizer_set_is_current_node_foreign ()
void
gumbo_tokenizer_set_is_current_node_foreign(struct  parser, bool is_foreign);

gumbo_tokenizer_set_state ()
void
gumbo_tokenizer_set_state(struct  parser, GumboTokenizerEnum state);

gumbo_tokenizer_state_destroy ()
void
gumbo_tokenizer_state_destroy(struct  parser);

gumbo_tokenizer_state_init ()
void
gumbo_tokenizer_state_init(struct  parser, const char * text, size_t text_length);

gumbo_vector_add ()
void
gumbo_vector_add    (struct  parser, void * element, GumboVector vector);

gumbo_vector_destroy ()
void
gumbo_vector_destroy(struct  parser, GumboVector vector);

gumbo_vector_index_of ()
int
gumbo_vector_index_of(GumboVector vector, void * element);

Returns the first index at which an element appears in this vector (testing by pointer equality), or -1 if it never does.


gumbo_vector_init ()
void
gumbo_vector_init   (struct  parser, size_t initial_capacity, GumboVector vector);

gumbo_vector_insert_at ()
void
gumbo_vector_insert_at(struct  parser, void * element, int index, GumboVector vector);

gumbo_vector_pop ()
void *
gumbo_vector_pop    (struct  parser, GumboVector vector);

gumbo_vector_remove ()
void
gumbo_vector_remove (struct  parser, void * element, GumboVector vector);

gumbo_vector_remove_at ()
void *
gumbo_vector_remove_at(struct  parser, int index, GumboVector vector);

handle_after_after_body ()
bool
handle_after_after_body(GumboParser parser, GumboToken token);

handle_after_after_frameset ()
bool
handle_after_after_frameset(GumboParser parser, GumboToken token);

handle_after_attr_name_state ()
StateResult
handle_after_attr_name_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_after_attr_value_quoted_state ()
StateResult
handle_after_attr_value_quoted_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_after_body ()
bool
handle_after_body   (GumboParser parser, GumboToken token);

handle_after_doctype_name_state ()
StateResult
handle_after_doctype_name_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_after_doctype_public_id_state ()
StateResult
handle_after_doctype_public_id_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_after_doctype_public_keyword_state ()
StateResult
handle_after_doctype_public_keyword_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_after_doctype_system_id_state ()
StateResult
handle_after_doctype_system_id_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_after_doctype_system_keyword_state ()
StateResult
handle_after_doctype_system_keyword_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_after_frameset ()
bool
handle_after_frameset(GumboParser parser, GumboToken token);

handle_after_head ()
bool
handle_after_head   (GumboParser parser, GumboToken token);

handle_attr_name_state ()
StateResult
handle_attr_name_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_attr_value_double_quoted_state ()
StateResult
handle_attr_value_double_quoted_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_attr_value_single_quoted_state ()
StateResult
handle_attr_value_single_quoted_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_attr_value_unquoted_state ()
StateResult
handle_attr_value_unquoted_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_before_attr_name_state ()
StateResult
handle_before_attr_name_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_before_attr_value_state ()
StateResult
handle_before_attr_value_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_before_doctype_name_state ()
StateResult
handle_before_doctype_name_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_before_doctype_public_id_state ()
StateResult
handle_before_doctype_public_id_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_before_doctype_system_id_state ()
StateResult
handle_before_doctype_system_id_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_before_head ()
bool
handle_before_head  (GumboParser parser, GumboToken token);

handle_before_html ()
bool
handle_before_html  (GumboParser parser, GumboToken token);

handle_between_doctype_public_system_id_state ()
StateResult
handle_between_doctype_public_system_id_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_bogus_comment_state ()
StateResult
handle_bogus_comment_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_bogus_doctype_state ()
StateResult
handle_bogus_doctype_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_cdata_state ()
StateResult
handle_cdata_state  (GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_char_ref_in_attr_value_state ()
StateResult
handle_char_ref_in_attr_value_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_char_ref_in_data_state ()
StateResult
handle_char_ref_in_data_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_char_ref_in_rcdata_state ()
StateResult
handle_char_ref_in_rcdata_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_comment_end_bang_state ()
StateResult
handle_comment_end_bang_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_comment_end_dash_state ()
StateResult
handle_comment_end_dash_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_comment_end_state ()
StateResult
handle_comment_end_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_comment_start_dash_state ()
StateResult
handle_comment_start_dash_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_comment_start_state ()
StateResult
handle_comment_start_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_comment_state ()
StateResult
handle_comment_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_data_state ()
StateResult
handle_data_state   (GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_doctype_name_state ()
StateResult
handle_doctype_name_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_doctype_public_id_double_quoted_state ()
StateResult
handle_doctype_public_id_double_quoted_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_doctype_public_id_single_quoted_state ()
StateResult
handle_doctype_public_id_single_quoted_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_doctype_state ()
StateResult
handle_doctype_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_doctype_system_id_double_quoted_state ()
StateResult
handle_doctype_system_id_double_quoted_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_doctype_system_id_single_quoted_state ()
StateResult
handle_doctype_system_id_single_quoted_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_end_tag_open_state ()
StateResult
handle_end_tag_open_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_html_content ()
bool
handle_html_content (GumboParser parser, GumboToken token);

handle_in_body ()
bool
handle_in_body      (GumboParser parser, GumboToken token);

handle_in_caption ()
bool
handle_in_caption   (GumboParser parser, GumboToken token);

handle_in_cell ()
bool
handle_in_cell      (GumboParser parser, GumboToken token);

handle_in_column_group ()
bool
handle_in_column_group(GumboParser parser, GumboToken token);

handle_in_foreign_content ()
bool
handle_in_foreign_content(GumboParser parser, GumboToken token);

handle_in_frameset ()
bool
handle_in_frameset  (GumboParser parser, GumboToken token);

handle_in_head ()
bool
handle_in_head      (GumboParser parser, GumboToken token);

handle_in_head_noscript ()
bool
handle_in_head_noscript(GumboParser parser, GumboToken token);

handle_in_row ()
bool
handle_in_row       (GumboParser parser, GumboToken token);

handle_in_select ()
bool
handle_in_select    (GumboParser parser, GumboToken token);

handle_in_select_in_table ()
bool
handle_in_select_in_table(GumboParser parser, GumboToken token);

handle_in_table ()
bool
handle_in_table     (GumboParser parser, GumboToken token);

handle_in_table_body ()
bool
handle_in_table_body(GumboParser parser, GumboToken token);

handle_in_table_text ()
bool
handle_in_table_text(GumboParser parser, GumboToken token);

handle_in_template ()
bool
handle_in_template  (GumboParser parser, GumboToken token);

handle_initial ()
bool
handle_initial      (GumboParser parser, GumboToken token);

handle_markup_declaration_state ()
StateResult
handle_markup_declaration_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_parser_error ()
void
handle_parser_error (GumboParser parser, const  error, GumboStringBuffer output);

handle_plaintext_state ()
StateResult
handle_plaintext_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_rawtext_end_tag_name_state ()
StateResult
handle_rawtext_end_tag_name_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_rawtext_end_tag_open_state ()
StateResult
handle_rawtext_end_tag_open_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_rawtext_lt_state ()
StateResult
handle_rawtext_lt_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_rawtext_state ()
StateResult
handle_rawtext_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_rcdata_end_tag_name_state ()
StateResult
handle_rcdata_end_tag_name_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_rcdata_end_tag_open_state ()
StateResult
handle_rcdata_end_tag_open_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_rcdata_lt_state ()
StateResult
handle_rcdata_lt_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_rcdata_state ()
StateResult
handle_rcdata_state (GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_script_double_escaped_dash_dash_state ()
StateResult
handle_script_double_escaped_dash_dash_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_script_double_escaped_dash_state ()
StateResult
handle_script_double_escaped_dash_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_script_double_escaped_end_state ()
StateResult
handle_script_double_escaped_end_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_script_double_escaped_lt_state ()
StateResult
handle_script_double_escaped_lt_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_script_double_escaped_start_state ()
StateResult
handle_script_double_escaped_start_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_script_double_escaped_state ()
StateResult
handle_script_double_escaped_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_script_end_tag_name_state ()
StateResult
handle_script_end_tag_name_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_script_end_tag_open_state ()
StateResult
handle_script_end_tag_open_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_script_escaped_dash_dash_state ()
StateResult
handle_script_escaped_dash_dash_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_script_escaped_dash_state ()
StateResult
handle_script_escaped_dash_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_script_escaped_end_tag_name_state ()
StateResult
handle_script_escaped_end_tag_name_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_script_escaped_end_tag_open_state ()
StateResult
handle_script_escaped_end_tag_open_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_script_escaped_lt_state ()
StateResult
handle_script_escaped_lt_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_script_escaped_start_dash_state ()
StateResult
handle_script_escaped_start_dash_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_script_escaped_start_state ()
StateResult
handle_script_escaped_start_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_script_escaped_state ()
StateResult
handle_script_escaped_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_script_lt_state ()
StateResult
handle_script_lt_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_script_state ()
StateResult
handle_script_state (GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_self_closing_start_tag_state ()
StateResult
handle_self_closing_start_tag_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_tag_name_state ()
StateResult
handle_tag_name_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_tag_open_state ()
StateResult
handle_tag_open_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);

handle_text ()
bool
handle_text         (GumboParser parser, GumboToken token);

handle_token ()
bool
handle_token        (GumboParser parser, GumboToken token);

has_an_element_in_button_scope ()
bool
has_an_element_in_button_scope(GumboParser parser, GumboTag tag);

has_an_element_in_list_scope ()
bool
has_an_element_in_list_scope(GumboParser parser, GumboTag tag);

has_an_element_in_scope ()
bool
has_an_element_in_scope(GumboParser parser, GumboTag tag);

has_an_element_in_scope_with_tagname ()
bool
has_an_element_in_scope_with_tagname(GumboParser parser, ... );

has_an_element_in_select_scope ()
bool
has_an_element_in_select_scope(GumboParser parser, GumboTag tag);

has_an_element_in_specific_scope ()
bool
has_an_element_in_specific_scope(GumboParser parser, GumboVector expected, bool negate, ... );

has_an_element_in_table_scope ()
bool
has_an_element_in_table_scope(GumboParser parser, GumboTag tag);

has_node_in_scope ()
bool
has_node_in_scope   (GumboParser parser, const  node);

ignore_token ()
void
ignore_token        (GumboParser parser);

implicitly_close_tags ()
bool
implicitly_close_tags(GumboParser parser, GumboToken token, GumboTag target);

initialize_tag_buffer ()
void
initialize_tag_buffer(GumboParser parser);

insert_element ()
void
insert_element      (GumboParser parser, GumboNode node, bool is_reconstructing_formatting_elements);

insert_element_from_token ()
GumboNode
insert_element_from_token(GumboParser parser, GumboToken token);

insert_element_of_tag_type ()
GumboNode
insert_element_of_tag_type(GumboParser parser, GumboTag tag, GumboParseFlags reason);

insert_foreign_element ()
GumboNode
insert_foreign_element(GumboParser parser, GumboToken token, GumboNamespaceEnum tag_namespace);

insert_node ()
void
insert_node         (GumboParser parser, GumboNode parent, int index, GumboNode node);

insert_text_token ()
void
insert_text_token   (GumboParser parser, GumboToken token);

is_alpha ()
bool
is_alpha            (int c);

is_appropriate_end_tag ()
bool
is_appropriate_end_tag(GumboParser parser);

is_html_integration_point ()
bool
is_html_integration_point(const  node);

is_in_static_list ()
bool
is_in_static_list   (const char * needle, const  haystack, bool exact_match);

is_legal_attribute_char_next ()
bool
is_legal_attribute_char_next(Utf8Iterator input);

is_mathml_integration_point ()
bool
is_mathml_integration_point(const  node);

is_open_element ()
bool
is_open_element     (GumboParser parser, const  node);

is_special_node ()
bool
is_special_node     (const  node);

malloc_wrapper ()
void *
malloc_wrapper      (void * unused, size_t size);

mark_tag_state_as_empty ()
void
mark_tag_state_as_empty(GumboTagState tag_state);

maybe_add_doctype_error ()
bool
maybe_add_doctype_error(GumboParser parser, const  token);

maybe_add_invalid_named_reference ()
bool
maybe_add_invalid_named_reference(struct  parser, Utf8Iterator input);

maybe_emit_from_temporary_buffer ()
bool
maybe_emit_from_temporary_buffer(GumboParser parser, GumboToken output);

maybe_flush_text_node_buffer ()
void
maybe_flush_text_node_buffer(GumboParser parser);

maybe_implicitly_close_list_tag ()
void
maybe_implicitly_close_list_tag(GumboParser parser, GumboToken token, bool is_li);

maybe_implicitly_close_p_tag ()
bool
maybe_implicitly_close_p_tag(GumboParser parser, GumboToken token);

maybe_replace_codepoint ()
int
maybe_replace_codepoint(int codepoint);

maybe_resize_string_buffer ()
void
maybe_resize_string_buffer(struct  parser, size_t additional_chars, GumboStringBuffer buffer);

merge_attributes ()
void
merge_attributes    (GumboParser parser, GumboToken token, GumboNode node);

new_document_node ()
GumboNode
new_document_node   (GumboParser parser);

node_tag_in ()
bool
node_tag_in         (const  node, ... );

node_tag_is ()
bool
node_tag_is         (const  node, GumboTag tag);

output_init ()
void
output_init         (GumboParser parser);

parse_digit ()
int
parse_digit         (int c, bool allow_hex);

parser_state_destroy ()
void
parser_state_destroy(GumboParser parser);

parser_state_init ()
void
parser_state_init   (GumboParser parser);

pop_current_node ()
GumboNode
pop_current_node    (GumboParser parser);

pop_template_insertion_mode ()
void
pop_template_insertion_mode(GumboParser parser);

print_message ()
int
print_message       (GumboParser parser, GumboStringBuffer output, const char * format, ... );

print_tag_stack ()
void
print_tag_stack     (GumboParser parser, const  error, GumboStringBuffer output);

push_template_insertion_mode ()
void
push_template_insertion_mode(GumboParser parser, GumboInsertionMode mode);

read_char ()
void
read_char           (Utf8Iterator iter);

reconstruct_active_formatting_elements ()
void
reconstruct_active_formatting_elements(GumboParser parser);

record_end_of_element ()
void
record_end_of_element(GumboToken current_token, GumboElement element);

reinitialize_tag_buffer ()
void
reinitialize_tag_buffer(GumboParser parser);

remove_from_parent ()
void
remove_from_parent  (GumboParser parser, GumboNode node);

reset_insertion_mode_appropriately ()
void
reset_insertion_mode_appropriately(GumboParser parser);

reset_tag_buffer_start_point ()
void
reset_tag_buffer_start_point(GumboParser parser);

reset_token_start_point ()
void
reset_token_start_point(GumboTokenizerState tokenizer);

run_generic_parsing_algorithm ()
void
run_generic_parsing_algorithm(GumboParser parser, GumboToken token, GumboTokenizerEnum lexer_state);

set_frameset_not_ok ()
void
set_frameset_not_ok (GumboParser parser);

set_insertion_mode ()
void
set_insertion_mode  (GumboParser parser, GumboInsertionMode mode);

start_new_tag ()
void
start_new_tag       (GumboParser parser, bool is_start_tag);

tag_in ()
bool
tag_in              (const  token, bool is_start, ... );

tag_is ()
bool
tag_is              (const  token, bool is_start, GumboTag tag);

temporary_buffer_equals ()
bool
temporary_buffer_equals(GumboParser parser, const char * text);

token_has_attribute ()
bool
token_has_attribute (const  token, const char * name);

update_position ()
void
update_position     (Utf8Iterator iter);

utf8_is_invalid_code_point ()
bool
utf8_is_invalid_code_point(int c);

utf8iterator_current ()
int
utf8iterator_current(const  iter);

utf8iterator_fill_error_at_mark ()
void
utf8iterator_fill_error_at_mark(Utf8Iterator iter, struct  error);

utf8iterator_get_char_pointer ()
const char *
utf8iterator_get_char_pointer(const  iter);

utf8iterator_get_position ()
void
utf8iterator_get_position(const  iter, GumboSourcePosition output);

utf8iterator_init ()
void
utf8iterator_init   (struct  parser, const char * source, size_t source_length, Utf8Iterator iter);

utf8iterator_mark ()
void
utf8iterator_mark   (Utf8Iterator iter);

utf8iterator_maybe_consume_match ()
bool
utf8iterator_maybe_consume_match(Utf8Iterator iter, const char * prefix, size_t length, bool case_sensitive);

utf8iterator_next ()
void
utf8iterator_next   (Utf8Iterator iter);

utf8iterator_reset ()
void
utf8iterator_reset  (Utf8Iterator iter);