Synopsis
Gumbo - A pure-C HTML5 parser, adapted to be installable via clib.
API
acknowledge_self_closing_tag
add_codepoint_error
add_duplicate_attr_error
add_error
add_formatting_element
add_named_reference_error
add_no_digit_error
add_parse_error
adjust_foreign_attributes
adjust_mathml_attributes
adjust_svg_attributes
adoption_agency_algorithm
all_attributes_match
append_char_to_tag_buffer
append_char_to_temporary_buffer
append_comment_node
append_node
attribute_matches
attribute_matches_case_sensitive
clear_active_formatting_elements
clear_stack_to_table_body_context
clear_stack_to_table_context
clear_stack_to_table_row_context
clear_temporary_buffer
clone_node
close_current_cell
close_current_select
close_table
close_table_cell
compute_quirks_mode
consume_char_ref
consume_named_ref
consume_numeric_ref
copy_over_original_tag_text
copy_over_tag_buffer
count_formatting_elements_of_tag
create_element
create_element_from_token
create_node
destroy_node
doc_type_state_init
doctype_matches
emit_char
emit_char_ref
emit_comment
emit_current_char
emit_current_tag
emit_doctype
emit_eof
emit_replacement_char
emit_temporary_buffer
enlarge_vector_if_full
ensure_lowercase
find_last_anchor_index
find_last_newline
find_named_char_ref
find_next_newline
finish_attribute_name
finish_attribute_value
finish_doctype_public_id
finish_doctype_system_id
finish_parsing
finish_tag_name
finish_temporary_buffer
finish_token
foster_parent_element
free_wrapper
generate_implied_end_tags
get_appropriate_insertion_mode
get_char_token_type
get_current_node
get_current_template_insertion_mode
get_document_node
gumbo_add_error
gumbo_caret_diagnostic_to_string
gumbo_copy_stringz
gumbo_debug
gumbo_destroy_attribute
gumbo_destroy_errors
gumbo_destroy_node
gumbo_destroy_output
gumbo_error_destroy
gumbo_error_to_string
gumbo_get_attribute
gumbo_init_errors
gumbo_lex
gumbo_normalize_svg_tagname
gumbo_normalized_tagname
gumbo_parse
gumbo_parse_with_options
gumbo_parser_allocate
gumbo_parser_deallocate
gumbo_print_caret_diagnostic
gumbo_string_buffer_append_codepoint
gumbo_string_buffer_append_string
gumbo_string_buffer_destroy
gumbo_string_buffer_init
gumbo_string_buffer_reserve
gumbo_string_buffer_to_string
gumbo_string_copy
gumbo_string_equals
gumbo_string_equals_ignore_case
gumbo_tag_enum
gumbo_tag_from_original_text
gumbo_token_destroy
gumbo_tokenizer_set_is_current_node_foreign
gumbo_tokenizer_set_state
gumbo_tokenizer_state_destroy
gumbo_tokenizer_state_init
gumbo_vector_add
gumbo_vector_destroy
gumbo_vector_index_of
gumbo_vector_init
gumbo_vector_insert_at
gumbo_vector_pop
gumbo_vector_remove
gumbo_vector_remove_at
handle_after_after_body
handle_after_after_frameset
handle_after_attr_name_state
handle_after_attr_value_quoted_state
handle_after_body
handle_after_doctype_name_state
handle_after_doctype_public_id_state
handle_after_doctype_public_keyword_state
handle_after_doctype_system_id_state
handle_after_doctype_system_keyword_state
handle_after_frameset
handle_after_head
handle_attr_name_state
handle_attr_value_double_quoted_state
handle_attr_value_single_quoted_state
handle_attr_value_unquoted_state
handle_before_attr_name_state
handle_before_attr_value_state
handle_before_doctype_name_state
handle_before_doctype_public_id_state
handle_before_doctype_system_id_state
handle_before_head
handle_before_html
handle_between_doctype_public_system_id_state
handle_bogus_comment_state
handle_bogus_doctype_state
handle_cdata_state
handle_char_ref_in_attr_value_state
handle_char_ref_in_data_state
handle_char_ref_in_rcdata_state
handle_comment_end_bang_state
handle_comment_end_dash_state
handle_comment_end_state
handle_comment_start_dash_state
handle_comment_start_state
handle_comment_state
handle_data_state
handle_doctype_name_state
handle_doctype_public_id_double_quoted_state
handle_doctype_public_id_single_quoted_state
handle_doctype_state
handle_doctype_system_id_double_quoted_state
handle_doctype_system_id_single_quoted_state
handle_end_tag_open_state
handle_html_content
handle_in_body
handle_in_caption
handle_in_cell
handle_in_column_group
handle_in_foreign_content
handle_in_frameset
handle_in_head
handle_in_head_noscript
handle_in_row
handle_in_select
handle_in_select_in_table
handle_in_table
handle_in_table_body
handle_in_table_text
handle_in_template
handle_initial
handle_markup_declaration_state
handle_parser_error
handle_plaintext_state
handle_rawtext_end_tag_name_state
handle_rawtext_end_tag_open_state
handle_rawtext_lt_state
handle_rawtext_state
handle_rcdata_end_tag_name_state
handle_rcdata_end_tag_open_state
handle_rcdata_lt_state
handle_rcdata_state
handle_script_double_escaped_dash_dash_state
handle_script_double_escaped_dash_state
handle_script_double_escaped_end_state
handle_script_double_escaped_lt_state
handle_script_double_escaped_start_state
handle_script_double_escaped_state
handle_script_end_tag_name_state
handle_script_end_tag_open_state
handle_script_escaped_dash_dash_state
handle_script_escaped_dash_state
handle_script_escaped_end_tag_name_state
handle_script_escaped_end_tag_open_state
handle_script_escaped_lt_state
handle_script_escaped_start_dash_state
handle_script_escaped_start_state
handle_script_escaped_state
handle_script_lt_state
handle_script_state
handle_self_closing_start_tag_state
handle_tag_name_state
handle_tag_open_state
handle_text
handle_token
has_an_element_in_button_scope
has_an_element_in_list_scope
has_an_element_in_scope
has_an_element_in_scope_with_tagname
has_an_element_in_select_scope
has_an_element_in_specific_scope
has_an_element_in_table_scope
has_node_in_scope
ignore_token
implicitly_close_tags
initialize_tag_buffer
insert_element
insert_element_from_token
insert_element_of_tag_type
insert_foreign_element
insert_node
insert_text_token
is_alpha
is_appropriate_end_tag
is_html_integration_point
is_in_static_list
is_legal_attribute_char_next
is_mathml_integration_point
is_open_element
is_special_node
malloc_wrapper
mark_tag_state_as_empty
maybe_add_doctype_error
maybe_add_invalid_named_reference
maybe_emit_from_temporary_buffer
maybe_flush_text_node_buffer
maybe_implicitly_close_list_tag
maybe_implicitly_close_p_tag
maybe_replace_codepoint
maybe_resize_string_buffer
merge_attributes
new_document_node
node_tag_in
node_tag_is
output_init
parse_digit
parser_state_destroy
parser_state_init
pop_current_node
pop_template_insertion_mode
print_message
print_tag_stack
push_template_insertion_mode
read_char
reconstruct_active_formatting_elements
record_end_of_element
reinitialize_tag_buffer
remove_from_parent
reset_insertion_mode_appropriately
reset_tag_buffer_start_point
reset_token_start_point
run_generic_parsing_algorithm
set_frameset_not_ok
set_insertion_mode
start_new_tag
tag_in
tag_is
temporary_buffer_equals
token_has_attribute
update_position
utf8_is_invalid_code_point
utf8iterator_current
utf8iterator_fill_error_at_mark
utf8iterator_get_char_pointer
utf8iterator_get_position
utf8iterator_init
utf8iterator_mark
utf8iterator_maybe_consume_match
utf8iterator_next
utf8iterator_reset
Details
- _NamespacedAttributeReplacement
- _ReplacementEntry
- _TextNodeBufferState
- CharReplacement
- GumboAttribute
-
The namespace for the attribute. This will usually be GUMBO_ATTR_NAMESPACE_NONE, but some XLink/XMLNS/XML attributes take special values, per:
The name of the attribute. This is in a freshly-allocated buffer to deal with case-normalization, and is null-terminated.
The original text of the attribute name, as a pointer into the original source buffer.
The value of the attribute. This is in a freshly-allocated buffer to deal with unescaping, and is null-terminated. It does not include any quotes that surround the attribute. If the attribute has no value (for example, 'selected' on a checkbox), this will be an empty string.
The original text of the value of the attribute. This points into the original source buffer. It includes any quotes that surround the attribute, and you can look at original_value.data[0] and original_value.data[original_value.length - 1] to determine what the quote characters were. If the attribute has no value, this will be a 0-length string.
The starting position of the attribute name.
The ending position of the attribute name. This is not always derivable from the starting position of the value because of the possibility of whitespace around the = sign.
The starting position of the attribute value.
The ending position of the attribute value.
A struct representing a single attribute on an HTML tag. This is a name-value pair, but also includes information about source locations and original source text.
- GumboDocument
-
An array of GumboNodes, containing the children of this element. This will normally consist of the element and any comment nodes found. Pointers are owned.
Whether or not the document is in QuirksMode, as determined by the values in the GumboTokenDocType template.
Information specific to document nodes.
- GumboElement
-
An array of GumboNodes, containing the children of this element. Pointers are owned.
The GumboTag enum for this element.
The GumboNamespaceEnum for this element.
A
A
The source position for the start of the start tag.
The source position for the start of the end tag.
An array of GumboAttributes, containing the attributes for this tag in the order that they were parsed. Pointers are owned.
The struct used to represent all HTML elements. This contains information about the tag, attributes, and child nodes.
- GumboInternalDuplicateAttrError
- GumboInternalError
- GumboInternalNode
-
The type of node that this is.
Pointer back to parent node. Not owned.
The index within the parent's children vector of this node.
A bitvector of flags containing information about why this element was inserted into the parse tree, including a variety of special parse situations.
The actual node data.
A supertype for
- GumboInternalOptions
-
A memory allocator function. Default: malloc.
A memory deallocator function. Default: free.
An opaque object that's passed in as the first argument to all callbacks used by this library. Default: NULL.
The tab-stop size, for computing positions in source code that uses tabs. Default: 8.
Whether or not to stop parsing when the first error is encountered. Default: false.
The maximum number of errors before the parser stops recording them. This is provided so that if the page is totally borked, we don't completely fill up the errors vector and exhaust memory with useless redundant errors. Set to -1 to disable the limit. Default: -1
Input struct containing configuration options for the parser. These let you specify alternate memory managers, provide different error handling, etc. Use kGumboDefaultOptions for sensible defaults, and only set what you need.
- GumboInternalOutput
-
Pointer to the document node. This is a GumboNode of type NODE_DOCUMENT that contains the entire document as its child.
Pointer to the root node. This the tag that forms the root of the document.
A list of errors that occurred during the parse. NOTE: In version 1.0 of this library, the API for errors hasn't been fully fleshed out and may change in the future. For this reason, the GumboError header isn't part of the public API. Contact us if you need errors reported so we can work out something appropriate for your use-case.
The output struct containing the results of the parse.
- GumboInternalParser
- GumboInternalParserError
- GumboInternalParserState
- GumboInternalTagState
- GumboInternalToken
- GumboInternalTokenDocType
- GumboInternalTokenizerError
- GumboInternalTokenizerState
- GumboInternalTokenStartTag
- GumboInternalUtf8Iterator
- GumboSourcePosition
-
A struct representing a character position within the original text buffer. Line and column numbers are 1-based and offsets are 0-based, which matches how most editors and command-line tools work. Also, columns measure positions in terms of characters while offsets measure by bytes; this is because the offset field is often used to pull out a particular region of text (which in most languages that bind to C implies pointer arithmetic on a buffer of bytes), while the column field is often used to reference a particular column on a printable display, which nowadays is usually UTF-8.
- GumboStringBuffer
- GumboStringPiece
-
A pointer to the beginning of the string. NULL iff length == 0.
The length of the string fragment, in bytes. May be zero.
A struct representing a string or part of a string. Strings within the parser are represented by a char* and a length; the char* points into an existing data buffer owned by some other code (often the original input). GumboStringPieces are assumed (by convention) to be immutable, because they may share data. Use
- GumboText
-
The text of this node, after entities have been parsed and decoded. For comment/cdata nodes, this does not include the comment delimiters.
The original text of this node, as a pointer into the original buffer. For comment/cdata nodes, this includes the comment delimiters.
The starting position of this node. This corresponds to the position of original_text, before entities are decoded.
The struct used to represent TEXT, CDATA, COMMENT, and WHITESPACE elements. This contains just a block of text and its position.
- GumboVector
-
Data elements. This points to a dynamically-allocated array of capacity elements, each a void* to the element itself.
Number of elements currently in the vector.
Current array capacity.
A simple vector implementation. This stores a pointer to a data array and a length. All elements are stored as void*; client code must cast to the appropriate type. Overflows upon addition result in reallocation of the data array, with the size doubling to maintain O(1) amortized cost. There is no removal function, as this isn't needed for any of the operations within this library. Iteration can be done through inspecting the structure directly in a for-loop.
- NamedCharRef
- OneOrTwoCodepoints
- abandon_current_tag ()
-
void abandon_current_tag (GumboParser parser);
- acknowledge_self_closing_tag ()
-
void acknowledge_self_closing_tag(GumboParser parser);
- add_codepoint_error ()
-
void add_codepoint_error (struct parser, Utf8Iterator input, GumboErrorType type, int codepoint);
- add_duplicate_attr_error ()
-
void add_duplicate_attr_error(GumboParser parser, const char * attr_name, int original_index, int new_index);
- add_error ()
-
void add_error (Utf8Iterator iter, GumboErrorType type);
- add_formatting_element ()
-
void add_formatting_element(GumboParser parser, const node);
- add_named_reference_error ()
-
void add_named_reference_error(struct parser, Utf8Iterator input, GumboErrorType type, GumboStringPiece text);
- add_no_digit_error ()
-
void add_no_digit_error (struct parser, Utf8Iterator input);
- add_parse_error ()
-
void add_parse_error (GumboParser parser, GumboErrorType type);
- adjust_foreign_attributes ()
-
void adjust_foreign_attributes(GumboParser parser, GumboToken token);
- adjust_mathml_attributes ()
-
void adjust_mathml_attributes(GumboParser parser, GumboToken token);
- adjust_svg_attributes ()
-
void adjust_svg_attributes(GumboParser parser, GumboToken token);
- adoption_agency_algorithm ()
-
bool adoption_agency_algorithm(GumboParser parser, GumboToken token, GumboTag closing_tag);
- all_attributes_match ()
-
bool all_attributes_match(const attr1, const attr2);
- append_char_to_tag_buffer ()
-
void append_char_to_tag_buffer(GumboParser parser, int codepoint, bool reinitilize_position_on_first);
- append_char_to_temporary_buffer ()
-
void append_char_to_temporary_buffer(GumboParser parser, int codepoint);
- append_comment_node ()
-
void append_comment_node (GumboParser parser, GumboNode node, const token);
- append_node ()
-
void append_node (GumboParser parser, GumboNode parent, GumboNode node);
- attribute_matches ()
-
bool attribute_matches (const attributes, const char * name, const char * value);
- attribute_matches_case_sensitive ()
-
bool attribute_matches_case_sensitive(const attributes, const char * name, const char * value);
- clear_active_formatting_elements ()
-
void clear_active_formatting_elements(GumboParser parser);
- clear_stack_to_table_body_context ()
-
void clear_stack_to_table_body_context(GumboParser parser);
- clear_stack_to_table_context ()
-
void clear_stack_to_table_context(GumboParser parser);
- clear_stack_to_table_row_context ()
-
void clear_stack_to_table_row_context(GumboParser parser);
- clear_temporary_buffer ()
-
void clear_temporary_buffer(GumboParser parser);
- clone_node ()
-
GumboNode clone_node (GumboParser parser, const node, GumboParseFlags reason);
- close_current_cell ()
-
bool close_current_cell (GumboParser parser, const token);
- close_current_select ()
-
void close_current_select(GumboParser parser);
- close_table ()
-
bool close_table (GumboParser parser);
- close_table_cell ()
-
bool close_table_cell (GumboParser parser, const token, GumboTag cell_tag);
- compute_quirks_mode ()
-
GumboQuirksModeEnum compute_quirks_mode (const doctype);
- consume_char_ref ()
-
bool consume_char_ref (struct parser, struct input, int additional_allowed_char, bool is_in_attribute, OneOrTwoCodepoints output);
- consume_named_ref ()
-
bool consume_named_ref (struct parser, Utf8Iterator input, bool is_in_attribute, OneOrTwoCodepoints output);
- consume_numeric_ref ()
-
bool consume_numeric_ref (struct parser, Utf8Iterator input, int * output);
- copy_over_original_tag_text ()
-
void copy_over_original_tag_text(GumboParser parser, GumboStringPiece original_text, GumboSourcePosition start_pos, GumboSourcePosition end_pos);
- copy_over_tag_buffer ()
-
void copy_over_tag_buffer(GumboParser parser, const char ** output);
- count_formatting_elements_of_tag ()
-
int count_formatting_elements_of_tag(GumboParser parser, const desired_node, int * earliest_matching_index);
- create_element ()
-
GumboNode create_element (GumboParser parser, GumboTag tag);
- create_element_from_token ()
-
GumboNode create_element_from_token(GumboParser parser, GumboToken token, GumboNamespaceEnum tag_namespace);
- create_node ()
-
GumboNode create_node (GumboParser parser, GumboNodeType type);
- destroy_node ()
-
void destroy_node (GumboParser parser, GumboNode node);
- doc_type_state_init ()
-
void doc_type_state_init (GumboParser parser);
- doctype_matches ()
-
bool doctype_matches (const doctype, const public_id, const system_id, bool allow_missing_system_id);
- emit_char ()
-
void emit_char (GumboParser parser, int c, GumboToken output);
- emit_char_ref ()
-
StateResult emit_char_ref (GumboParser parser, int additional_allowed_char, bool is_in_attribute, GumboToken output);
- emit_comment ()
-
StateResult emit_comment (GumboParser parser, GumboToken output);
- emit_current_char ()
-
bool emit_current_char (GumboParser parser, GumboToken output);
- emit_current_tag ()
-
StateResult emit_current_tag (GumboParser parser, GumboToken output);
- emit_doctype ()
-
void emit_doctype (GumboParser parser, GumboToken output);
- emit_eof ()
-
StateResult emit_eof (GumboParser parser, GumboToken output);
- emit_replacement_char ()
-
StateResult emit_replacement_char(GumboParser parser, GumboToken output);
- emit_temporary_buffer ()
-
bool emit_temporary_buffer(GumboParser parser, GumboToken output);
- enlarge_vector_if_full ()
-
void enlarge_vector_if_full(struct parser, GumboVector vector);
- ensure_lowercase ()
-
int ensure_lowercase (int c);
- find_last_anchor_index ()
-
bool find_last_anchor_index(GumboParser parser, int * anchor_index);
- find_last_newline ()
-
const char * find_last_newline (const char * original_text, const char * error_location);
- find_named_char_ref ()
-
const find_named_char_ref (Utf8Iterator input);
- find_next_newline ()
-
const char * find_next_newline (const char * original_text, const char * error_location);
- finish_attribute_name ()
-
bool finish_attribute_name(GumboParser parser);
- finish_attribute_value ()
-
void finish_attribute_value(GumboParser parser);
- finish_doctype_public_id ()
-
void finish_doctype_public_id(GumboParser parser);
- finish_doctype_system_id ()
-
void finish_doctype_system_id(GumboParser parser);
- finish_parsing ()
-
void finish_parsing (GumboParser parser);
- finish_tag_name ()
-
void finish_tag_name (GumboParser parser);
- finish_temporary_buffer ()
-
void finish_temporary_buffer(GumboParser parser, const char ** output);
- finish_token ()
-
void finish_token (GumboParser parser, GumboToken token);
- foster_parent_element ()
-
void foster_parent_element(GumboParser parser, GumboNode node);
- free_wrapper ()
-
void free_wrapper (void * unused, void * ptr);
- generate_implied_end_tags ()
-
void generate_implied_end_tags(GumboParser parser, GumboTag exception);
- get_appropriate_insertion_mode ()
-
GumboInsertionMode get_appropriate_insertion_mode(const node, bool is_last);
- get_char_token_type ()
-
GumboTokenType get_char_token_type (int c);
- get_current_node ()
-
GumboNode get_current_node (GumboParser parser);
- get_current_template_insertion_mode ()
-
GumboInsertionMode get_current_template_insertion_mode(GumboParser parser);
- get_document_node ()
-
GumboNode get_document_node (GumboParser parser);
- gumbo_add_error ()
-
GumboError gumbo_add_error (struct parser);
- gumbo_caret_diagnostic_to_string ()
-
void gumbo_caret_diagnostic_to_string(struct parser, const error, const char * source_text, GumboStringBuffer output);
- gumbo_copy_stringz ()
-
char * gumbo_copy_stringz (struct parser, const char * str);
- gumbo_debug ()
-
void gumbo_debug (const char * format, ... );
- gumbo_destroy_attribute ()
-
void gumbo_destroy_attribute(struct parser, GumboAttribute attribute);
- gumbo_destroy_errors ()
-
void gumbo_destroy_errors(struct errors);
- gumbo_destroy_node ()
-
void gumbo_destroy_node (GumboOptions options, GumboNode node);
- gumbo_destroy_output ()
-
void gumbo_destroy_output(const options, GumboOutput output);
Release the memory used for the parse tree & parse errors.
- gumbo_error_destroy ()
-
void gumbo_error_destroy (struct parser, GumboError error);
- gumbo_error_to_string ()
-
void gumbo_error_to_string(struct parser, const error, GumboStringBuffer output);
- gumbo_get_attribute ()
-
GumboAttribute gumbo_get_attribute (const attrs, const char * name);
Given a vector of GumboAttributes, look up the one with the specified name and return it, or NULL if no such attribute exists. This uses a case-insensitive match, as HTML is case-insensitive.
- gumbo_init_errors ()
-
void gumbo_init_errors (struct errors);
- gumbo_lex ()
-
bool gumbo_lex (struct parser, GumboToken output);
- gumbo_normalize_svg_tagname ()
-
const char * gumbo_normalize_svg_tagname(const tagname);
Fixes the case of SVG elements that are not all lowercase.
- gumbo_normalized_tagname ()
-
const char * gumbo_normalized_tagname(GumboTag tag);
Returns the normalized (usually all-lowercased, except for foreign content) tag name for an GumboTag enum. Return value is static data owned by the library.
- gumbo_parse ()
-
GumboOutput gumbo_parse (const char * buffer);
Parses a buffer of UTF8 text into an GumboNode parse tree. The buffer must live at least as long as the parse tree, as some fields (eg. original_text) point directly into the original buffer.
This doesn't support buffers longer than 4 gigabytes.
- gumbo_parse_with_options ()
-
GumboOutput gumbo_parse_with_options(const options, const char * buffer, size_t buffer_length);
Extended version of gumbo_parse that takes an explicit options structure, buffer, and length.
- gumbo_parser_allocate ()
-
void * gumbo_parser_allocate(struct parser, size_t num_bytes);
- gumbo_parser_deallocate ()
-
void gumbo_parser_deallocate(struct parser, void * ptr);
- gumbo_print_caret_diagnostic ()
-
void gumbo_print_caret_diagnostic(struct parser, const error, const char * source_text);
- gumbo_string_buffer_append_codepoint ()
-
void gumbo_string_buffer_append_codepoint(struct parser, int c, GumboStringBuffer output);
- gumbo_string_buffer_append_string ()
-
void gumbo_string_buffer_append_string(struct parser, GumboStringPiece str, GumboStringBuffer output);
- gumbo_string_buffer_destroy ()
-
void gumbo_string_buffer_destroy(struct parser, GumboStringBuffer buffer);
- gumbo_string_buffer_init ()
-
void gumbo_string_buffer_init(struct parser, GumboStringBuffer output);
- gumbo_string_buffer_reserve ()
-
void gumbo_string_buffer_reserve(struct parser, size_t min_capacity, GumboStringBuffer output);
- gumbo_string_buffer_to_string ()
-
char * gumbo_string_buffer_to_string(struct parser, GumboStringBuffer input);
- gumbo_string_copy ()
-
void gumbo_string_copy (struct parser, GumboStringPiece dest, const source);
- gumbo_string_equals ()
-
bool gumbo_string_equals (const str1, const str2);
Compares two GumboStringPieces, and returns true if they're equal or false otherwise.
- gumbo_string_equals_ignore_case ()
-
bool gumbo_string_equals_ignore_case(const str1, const str2);
Compares two GumboStringPieces ignoring case, and returns true if they're equal or false otherwise.
- gumbo_tag_enum ()
-
GumboTag gumbo_tag_enum (const char * tagname);
Converts a tag name string (which may be in upper or mixed case) to a tag enum.
- gumbo_tag_from_original_text ()
-
void gumbo_tag_from_original_text(GumboStringPiece text);
Extracts the tag name from the original_text field of an element or token by stripping off > characters and attributes and adjusting the passed-in
- gumbo_token_destroy ()
-
void gumbo_token_destroy (struct parser, GumboToken token);
- gumbo_tokenizer_set_is_current_node_foreign ()
-
void gumbo_tokenizer_set_is_current_node_foreign(struct parser, bool is_foreign);
- gumbo_tokenizer_set_state ()
-
void gumbo_tokenizer_set_state(struct parser, GumboTokenizerEnum state);
- gumbo_tokenizer_state_destroy ()
-
void gumbo_tokenizer_state_destroy(struct parser);
- gumbo_tokenizer_state_init ()
-
void gumbo_tokenizer_state_init(struct parser, const char * text, size_t text_length);
- gumbo_vector_add ()
-
void gumbo_vector_add (struct parser, void * element, GumboVector vector);
- gumbo_vector_destroy ()
-
void gumbo_vector_destroy(struct parser, GumboVector vector);
- gumbo_vector_index_of ()
-
int gumbo_vector_index_of(GumboVector vector, void * element);
Returns the first index at which an element appears in this vector (testing by pointer equality), or -1 if it never does.
- gumbo_vector_init ()
-
void gumbo_vector_init (struct parser, size_t initial_capacity, GumboVector vector);
- gumbo_vector_insert_at ()
-
void gumbo_vector_insert_at(struct parser, void * element, int index, GumboVector vector);
- gumbo_vector_pop ()
-
void * gumbo_vector_pop (struct parser, GumboVector vector);
- gumbo_vector_remove ()
-
void gumbo_vector_remove (struct parser, void * element, GumboVector vector);
- gumbo_vector_remove_at ()
-
void * gumbo_vector_remove_at(struct parser, int index, GumboVector vector);
- handle_after_after_body ()
-
bool handle_after_after_body(GumboParser parser, GumboToken token);
- handle_after_after_frameset ()
-
bool handle_after_after_frameset(GumboParser parser, GumboToken token);
- handle_after_attr_name_state ()
-
StateResult handle_after_attr_name_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_after_attr_value_quoted_state ()
-
StateResult handle_after_attr_value_quoted_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_after_body ()
-
bool handle_after_body (GumboParser parser, GumboToken token);
- handle_after_doctype_name_state ()
-
StateResult handle_after_doctype_name_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_after_doctype_public_id_state ()
-
StateResult handle_after_doctype_public_id_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_after_doctype_public_keyword_state ()
-
StateResult handle_after_doctype_public_keyword_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_after_doctype_system_id_state ()
-
StateResult handle_after_doctype_system_id_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_after_doctype_system_keyword_state ()
-
StateResult handle_after_doctype_system_keyword_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_after_frameset ()
-
bool handle_after_frameset(GumboParser parser, GumboToken token);
- handle_after_head ()
-
bool handle_after_head (GumboParser parser, GumboToken token);
- handle_attr_name_state ()
-
StateResult handle_attr_name_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_attr_value_double_quoted_state ()
-
StateResult handle_attr_value_double_quoted_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_attr_value_single_quoted_state ()
-
StateResult handle_attr_value_single_quoted_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_attr_value_unquoted_state ()
-
StateResult handle_attr_value_unquoted_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_before_attr_name_state ()
-
StateResult handle_before_attr_name_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_before_attr_value_state ()
-
StateResult handle_before_attr_value_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_before_doctype_name_state ()
-
StateResult handle_before_doctype_name_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_before_doctype_public_id_state ()
-
StateResult handle_before_doctype_public_id_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_before_doctype_system_id_state ()
-
StateResult handle_before_doctype_system_id_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_before_head ()
-
bool handle_before_head (GumboParser parser, GumboToken token);
- handle_before_html ()
-
bool handle_before_html (GumboParser parser, GumboToken token);
- handle_between_doctype_public_system_id_state ()
-
StateResult handle_between_doctype_public_system_id_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_bogus_comment_state ()
-
StateResult handle_bogus_comment_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_bogus_doctype_state ()
-
StateResult handle_bogus_doctype_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_cdata_state ()
-
StateResult handle_cdata_state (GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_char_ref_in_attr_value_state ()
-
StateResult handle_char_ref_in_attr_value_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_char_ref_in_data_state ()
-
StateResult handle_char_ref_in_data_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_char_ref_in_rcdata_state ()
-
StateResult handle_char_ref_in_rcdata_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_comment_end_bang_state ()
-
StateResult handle_comment_end_bang_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_comment_end_dash_state ()
-
StateResult handle_comment_end_dash_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_comment_end_state ()
-
StateResult handle_comment_end_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_comment_start_dash_state ()
-
StateResult handle_comment_start_dash_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_comment_start_state ()
-
StateResult handle_comment_start_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_comment_state ()
-
StateResult handle_comment_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_data_state ()
-
StateResult handle_data_state (GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_doctype_name_state ()
-
StateResult handle_doctype_name_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_doctype_public_id_double_quoted_state ()
-
StateResult handle_doctype_public_id_double_quoted_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_doctype_public_id_single_quoted_state ()
-
StateResult handle_doctype_public_id_single_quoted_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_doctype_state ()
-
StateResult handle_doctype_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_doctype_system_id_double_quoted_state ()
-
StateResult handle_doctype_system_id_double_quoted_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_doctype_system_id_single_quoted_state ()
-
StateResult handle_doctype_system_id_single_quoted_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_end_tag_open_state ()
-
StateResult handle_end_tag_open_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_html_content ()
-
bool handle_html_content (GumboParser parser, GumboToken token);
- handle_in_body ()
-
bool handle_in_body (GumboParser parser, GumboToken token);
- handle_in_caption ()
-
bool handle_in_caption (GumboParser parser, GumboToken token);
- handle_in_cell ()
-
bool handle_in_cell (GumboParser parser, GumboToken token);
- handle_in_column_group ()
-
bool handle_in_column_group(GumboParser parser, GumboToken token);
- handle_in_foreign_content ()
-
bool handle_in_foreign_content(GumboParser parser, GumboToken token);
- handle_in_frameset ()
-
bool handle_in_frameset (GumboParser parser, GumboToken token);
- handle_in_head ()
-
bool handle_in_head (GumboParser parser, GumboToken token);
- handle_in_head_noscript ()
-
bool handle_in_head_noscript(GumboParser parser, GumboToken token);
- handle_in_row ()
-
bool handle_in_row (GumboParser parser, GumboToken token);
- handle_in_select ()
-
bool handle_in_select (GumboParser parser, GumboToken token);
- handle_in_select_in_table ()
-
bool handle_in_select_in_table(GumboParser parser, GumboToken token);
- handle_in_table ()
-
bool handle_in_table (GumboParser parser, GumboToken token);
- handle_in_table_body ()
-
bool handle_in_table_body(GumboParser parser, GumboToken token);
- handle_in_table_text ()
-
bool handle_in_table_text(GumboParser parser, GumboToken token);
- handle_in_template ()
-
bool handle_in_template (GumboParser parser, GumboToken token);
- handle_initial ()
-
bool handle_initial (GumboParser parser, GumboToken token);
- handle_markup_declaration_state ()
-
StateResult handle_markup_declaration_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_parser_error ()
-
void handle_parser_error (GumboParser parser, const error, GumboStringBuffer output);
- handle_plaintext_state ()
-
StateResult handle_plaintext_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_rawtext_end_tag_name_state ()
-
StateResult handle_rawtext_end_tag_name_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_rawtext_end_tag_open_state ()
-
StateResult handle_rawtext_end_tag_open_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_rawtext_lt_state ()
-
StateResult handle_rawtext_lt_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_rawtext_state ()
-
StateResult handle_rawtext_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_rcdata_end_tag_name_state ()
-
StateResult handle_rcdata_end_tag_name_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_rcdata_end_tag_open_state ()
-
StateResult handle_rcdata_end_tag_open_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_rcdata_lt_state ()
-
StateResult handle_rcdata_lt_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_rcdata_state ()
-
StateResult handle_rcdata_state (GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_script_double_escaped_dash_dash_state ()
-
StateResult handle_script_double_escaped_dash_dash_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_script_double_escaped_dash_state ()
-
StateResult handle_script_double_escaped_dash_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_script_double_escaped_end_state ()
-
StateResult handle_script_double_escaped_end_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_script_double_escaped_lt_state ()
-
StateResult handle_script_double_escaped_lt_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_script_double_escaped_start_state ()
-
StateResult handle_script_double_escaped_start_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_script_double_escaped_state ()
-
StateResult handle_script_double_escaped_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_script_end_tag_name_state ()
-
StateResult handle_script_end_tag_name_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_script_end_tag_open_state ()
-
StateResult handle_script_end_tag_open_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_script_escaped_dash_dash_state ()
-
StateResult handle_script_escaped_dash_dash_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_script_escaped_dash_state ()
-
StateResult handle_script_escaped_dash_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_script_escaped_end_tag_name_state ()
-
StateResult handle_script_escaped_end_tag_name_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_script_escaped_end_tag_open_state ()
-
StateResult handle_script_escaped_end_tag_open_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_script_escaped_lt_state ()
-
StateResult handle_script_escaped_lt_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_script_escaped_start_dash_state ()
-
StateResult handle_script_escaped_start_dash_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_script_escaped_start_state ()
-
StateResult handle_script_escaped_start_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_script_escaped_state ()
-
StateResult handle_script_escaped_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_script_lt_state ()
-
StateResult handle_script_lt_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_script_state ()
-
StateResult handle_script_state (GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_self_closing_start_tag_state ()
-
StateResult handle_self_closing_start_tag_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_tag_name_state ()
-
StateResult handle_tag_name_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_tag_open_state ()
-
StateResult handle_tag_open_state(GumboParser parser, GumboTokenizerState tokenizer, int c, GumboToken output);
- handle_text ()
-
bool handle_text (GumboParser parser, GumboToken token);
- handle_token ()
-
bool handle_token (GumboParser parser, GumboToken token);
- has_an_element_in_button_scope ()
-
bool has_an_element_in_button_scope(GumboParser parser, GumboTag tag);
- has_an_element_in_list_scope ()
-
bool has_an_element_in_list_scope(GumboParser parser, GumboTag tag);
- has_an_element_in_scope ()
-
bool has_an_element_in_scope(GumboParser parser, GumboTag tag);
- has_an_element_in_scope_with_tagname ()
-
bool has_an_element_in_scope_with_tagname(GumboParser parser, ... );
- has_an_element_in_select_scope ()
-
bool has_an_element_in_select_scope(GumboParser parser, GumboTag tag);
- has_an_element_in_specific_scope ()
-
bool has_an_element_in_specific_scope(GumboParser parser, GumboVector expected, bool negate, ... );
- has_an_element_in_table_scope ()
-
bool has_an_element_in_table_scope(GumboParser parser, GumboTag tag);
- has_node_in_scope ()
-
bool has_node_in_scope (GumboParser parser, const node);
- ignore_token ()
-
void ignore_token (GumboParser parser);
- implicitly_close_tags ()
-
bool implicitly_close_tags(GumboParser parser, GumboToken token, GumboTag target);
- initialize_tag_buffer ()
-
void initialize_tag_buffer(GumboParser parser);
- insert_element ()
-
void insert_element (GumboParser parser, GumboNode node, bool is_reconstructing_formatting_elements);
- insert_element_from_token ()
-
GumboNode insert_element_from_token(GumboParser parser, GumboToken token);
- insert_element_of_tag_type ()
-
GumboNode insert_element_of_tag_type(GumboParser parser, GumboTag tag, GumboParseFlags reason);
- insert_foreign_element ()
-
GumboNode insert_foreign_element(GumboParser parser, GumboToken token, GumboNamespaceEnum tag_namespace);
- insert_node ()
-
void insert_node (GumboParser parser, GumboNode parent, int index, GumboNode node);
- insert_text_token ()
-
void insert_text_token (GumboParser parser, GumboToken token);
- is_alpha ()
-
bool is_alpha (int c);
- is_appropriate_end_tag ()
-
bool is_appropriate_end_tag(GumboParser parser);
- is_html_integration_point ()
-
bool is_html_integration_point(const node);
- is_in_static_list ()
-
bool is_in_static_list (const char * needle, const haystack, bool exact_match);
- is_legal_attribute_char_next ()
-
bool is_legal_attribute_char_next(Utf8Iterator input);
- is_mathml_integration_point ()
-
bool is_mathml_integration_point(const node);
- is_open_element ()
-
bool is_open_element (GumboParser parser, const node);
- is_special_node ()
-
bool is_special_node (const node);
- malloc_wrapper ()
-
void * malloc_wrapper (void * unused, size_t size);
- mark_tag_state_as_empty ()
-
void mark_tag_state_as_empty(GumboTagState tag_state);
- maybe_add_doctype_error ()
-
bool maybe_add_doctype_error(GumboParser parser, const token);
- maybe_add_invalid_named_reference ()
-
bool maybe_add_invalid_named_reference(struct parser, Utf8Iterator input);
- maybe_emit_from_temporary_buffer ()
-
bool maybe_emit_from_temporary_buffer(GumboParser parser, GumboToken output);
- maybe_flush_text_node_buffer ()
-
void maybe_flush_text_node_buffer(GumboParser parser);
- maybe_implicitly_close_list_tag ()
-
void maybe_implicitly_close_list_tag(GumboParser parser, GumboToken token, bool is_li);
- maybe_implicitly_close_p_tag ()
-
bool maybe_implicitly_close_p_tag(GumboParser parser, GumboToken token);
- maybe_replace_codepoint ()
-
int maybe_replace_codepoint(int codepoint);
- maybe_resize_string_buffer ()
-
void maybe_resize_string_buffer(struct parser, size_t additional_chars, GumboStringBuffer buffer);
- merge_attributes ()
-
void merge_attributes (GumboParser parser, GumboToken token, GumboNode node);
- new_document_node ()
-
GumboNode new_document_node (GumboParser parser);
- node_tag_in ()
-
bool node_tag_in (const node, ... );
- node_tag_is ()
-
bool node_tag_is (const node, GumboTag tag);
- output_init ()
-
void output_init (GumboParser parser);
- parse_digit ()
-
int parse_digit (int c, bool allow_hex);
- parser_state_destroy ()
-
void parser_state_destroy(GumboParser parser);
- parser_state_init ()
-
void parser_state_init (GumboParser parser);
- pop_current_node ()
-
GumboNode pop_current_node (GumboParser parser);
- pop_template_insertion_mode ()
-
void pop_template_insertion_mode(GumboParser parser);
- print_message ()
-
int print_message (GumboParser parser, GumboStringBuffer output, const char * format, ... );
- print_tag_stack ()
-
void print_tag_stack (GumboParser parser, const error, GumboStringBuffer output);
- push_template_insertion_mode ()
-
void push_template_insertion_mode(GumboParser parser, GumboInsertionMode mode);
- read_char ()
-
void read_char (Utf8Iterator iter);
- reconstruct_active_formatting_elements ()
-
void reconstruct_active_formatting_elements(GumboParser parser);
- record_end_of_element ()
-
void record_end_of_element(GumboToken current_token, GumboElement element);
- reinitialize_tag_buffer ()
-
void reinitialize_tag_buffer(GumboParser parser);
- remove_from_parent ()
-
void remove_from_parent (GumboParser parser, GumboNode node);
- reset_insertion_mode_appropriately ()
-
void reset_insertion_mode_appropriately(GumboParser parser);
- reset_tag_buffer_start_point ()
-
void reset_tag_buffer_start_point(GumboParser parser);
- reset_token_start_point ()
-
void reset_token_start_point(GumboTokenizerState tokenizer);
- run_generic_parsing_algorithm ()
-
void run_generic_parsing_algorithm(GumboParser parser, GumboToken token, GumboTokenizerEnum lexer_state);
- set_frameset_not_ok ()
-
void set_frameset_not_ok (GumboParser parser);
- set_insertion_mode ()
-
void set_insertion_mode (GumboParser parser, GumboInsertionMode mode);
- start_new_tag ()
-
void start_new_tag (GumboParser parser, bool is_start_tag);
- tag_in ()
-
bool tag_in (const token, bool is_start, ... );
- tag_is ()
-
bool tag_is (const token, bool is_start, GumboTag tag);
- temporary_buffer_equals ()
-
bool temporary_buffer_equals(GumboParser parser, const char * text);
- token_has_attribute ()
-
bool token_has_attribute (const token, const char * name);
- update_position ()
-
void update_position (Utf8Iterator iter);
- utf8_is_invalid_code_point ()
-
bool utf8_is_invalid_code_point(int c);
- utf8iterator_current ()
-
int utf8iterator_current(const iter);
- utf8iterator_fill_error_at_mark ()
-
void utf8iterator_fill_error_at_mark(Utf8Iterator iter, struct error);
- utf8iterator_get_char_pointer ()
-
const char * utf8iterator_get_char_pointer(const iter);
- utf8iterator_get_position ()
-
void utf8iterator_get_position(const iter, GumboSourcePosition output);
- utf8iterator_init ()
-
void utf8iterator_init (struct parser, const char * source, size_t source_length, Utf8Iterator iter);
- utf8iterator_mark ()
-
void utf8iterator_mark (Utf8Iterator iter);
- utf8iterator_maybe_consume_match ()
-
bool utf8iterator_maybe_consume_match(Utf8Iterator iter, const char * prefix, size_t length, bool case_sensitive);
- utf8iterator_next ()
-
void utf8iterator_next (Utf8Iterator iter);
- utf8iterator_reset ()
-
void utf8iterator_reset (Utf8Iterator iter);