Table of Contents

Class HtmlAgilityHelper

Namespace
SunamoHtml
Assembly
SunamoHtml.dll

HtmlHelperText - for methods which NOT operate on HtmlAgiityHelper! HtmlAgilityHelper - getting new nodes HtmlAssistant - Only for methods which operate on HtmlAgiityHelper!

public class HtmlAgilityHelper
Inheritance
HtmlAgilityHelper
Inherited Members
Extension Methods

Fields

TextNode

Constant representing the text node type in HTML DOM.

public const string TextNode = "#text"

Field Value

string

Methods

CreateHtmlDocument(CreateHtmlDocumentInitData?)

Creates an HTML document with specific initialization options.

public static HtmlDocument CreateHtmlDocument(CreateHtmlDocumentInitData? data = null)

Parameters

data CreateHtmlDocumentInitData

Initialization data, or null for default settings.

Returns

HtmlDocument

Configured HTML document instance.

CreateNode(string)

Creates an HTML node from the given HTML string, wrapping non-tag content with spaces.

public static HtmlNode CreateNode(string html)

Parameters

html string

The HTML string to create a node from.

Returns

HtmlNode

The created HTML node.

FindAncestorParentNode(HtmlNode, string)

Finds an ancestor parent node with the specified tag name.

public static HtmlNode? FindAncestorParentNode(HtmlNode node, string tagName)

Parameters

node HtmlNode

The starting HTML node.

tagName string

The tag name to search for in ancestors.

Returns

HtmlNode

The ancestor node with matching tag name, or null if not found.

HasAncestorParentNode(HtmlNode, string)

Checks if the node has an ancestor with the specified tag name.

public static bool HasAncestorParentNode(HtmlNode node, string tagName)

Parameters

node HtmlNode

The starting HTML node.

tagName string

The tag name to search for in ancestors.

Returns

bool

True if an ancestor with the tag name exists, false otherwise.

InsertGroup(HtmlNode, List<string>)

Inserts a group of strings as inner HTML of the specified node, wrapping each string with spaces.

public static void InsertGroup(HtmlNode insertAfter, List<string> list)

Parameters

insertAfter HtmlNode

The HTML node to insert content into.

list List<string>

List of strings to insert as inner HTML.

Node(HtmlNode, bool, string)

Finds the first HTML node matching the specified tag within the given node.

public static HtmlNode? Node(HtmlNode node, bool recursive, string tag)

Parameters

node HtmlNode

The parent HTML node to search within.

recursive bool

Whether to search recursively in child nodes.

tag string

The HTML tag name to search for.

Returns

HtmlNode

The first matching HTML node, or null if not found.

NodeWithAttr(HtmlNode, bool, string, string, string, bool)

Return null if not found

public static HtmlNode? NodeWithAttr(HtmlNode node, bool recursive, string tag, string attr, string attrValue, bool contains = false)

Parameters

node HtmlNode
recursive bool
tag string
attr string
attrValue string
contains bool

Returns

HtmlNode

Nodes(HtmlNode, bool, string)

Gets all nodes with the specified tag name.

public static List<HtmlNode> Nodes(HtmlNode node, bool isRecursive, string tag)

Parameters

node HtmlNode

The HTML node to search in.

isRecursive bool

Whether to search recursively.

tag string

The tag name to search for.

Returns

List<HtmlNode>

List of matching HTML nodes with text nodes trimmed.

NodesWhichContainsInAttr(HtmlNode, bool, string, string, string, bool)

Finds all HTML nodes where the specified attribute contains the given value.

public static IList<HtmlNode> NodesWhichContainsInAttr(HtmlNode node, bool recursive, string tag, string attr, string attrValue, bool searchAsSingleString = true)

Parameters

node HtmlNode

The parent HTML node to search within.

recursive bool

Whether to search recursively in child nodes.

tag string

The HTML tag name to search for.

attr string

The attribute name to check.

attrValue string

The value to search for within the attribute.

searchAsSingleString bool

Whether to search the attribute value as a single string (true) or split by whitespace (false).

Returns

IList<HtmlNode>

List of matching HTML nodes.

NodesWithAttr(HtmlNode, bool, string, string, string, bool)

Gets nodes with exact attribute match.

public static IList<HtmlNode> NodesWithAttr(HtmlNode node, bool isRecursive, string tag, string attributeName, string attributeValue, bool isContains = false)

Parameters

node HtmlNode

The HTML node to search in.

isRecursive bool

Whether to search recursively.

tag string

The tag name to search for.

attributeName string

The attribute name to match.

attributeValue string

The attribute value to match.

isContains bool

Whether to use contains matching.

Returns

IList<HtmlNode>

List of matching HTML nodes.

NodesWithAttrWildCard(HtmlNode, bool, string, string, string, bool)

Gets nodes with attribute matching wildcard pattern.

public static IList<HtmlNode> NodesWithAttrWildCard(HtmlNode node, bool isRecursive, string tag, string attributeName, string attributeValue, bool isContains = false)

Parameters

node HtmlNode

The HTML node to search in.

isRecursive bool

Whether to search recursively.

tag string

The tag name to search for.

attributeName string

The attribute name to match.

attributeValue string

The attribute value pattern.

isContains bool

Whether to use contains matching.

Returns

IList<HtmlNode>

List of matching HTML nodes.

PairsDdDt(HtmlNode, bool, Dictionary<string, string>)

Extracts key-value pairs from HTML definition list (DL) by pairing DT (term) and DD (definition) elements.

public static Dictionary<string, string> PairsDdDt(HtmlNode dl, bool recursive, Dictionary<string, string> replaceHtmLForText)

Parameters

dl HtmlNode

The DL (definition list) HTML node to parse.

recursive bool

Whether to search recursively in child nodes.

replaceHtmLForText Dictionary<string, string>

Dictionary of HTML replacements to apply to extracted text.

Returns

Dictionary<string, string>

Dictionary with DT text as keys and DD text as values.

RecursiveReturnTags(List<HtmlNode>, HtmlNode, bool, bool, string)

Recursively returns HTML tags matching the specified tag name. If single is true, returns only the first match (like Node vs Nodes). Use "*" in parameter to match any tag.

public static void RecursiveReturnTags(List<HtmlNode> result, HtmlNode htmlNode, bool isRecursive, bool isSingle, string tagName)

Parameters

result List<HtmlNode>

The list to add found nodes to.

htmlNode HtmlNode

The HTML node to search in.

isRecursive bool

Whether to search recursively.

isSingle bool

Whether to stop after finding first match.

tagName string

The tag name to search for, or "*" for any tag.

RecursiveReturnTagsWithContainsAttr(List<HtmlNode>, HtmlNode, bool, string, string, string, bool, bool)

Recursively returns tags with attribute containing specified value.

public static void RecursiveReturnTagsWithContainsAttr(List<HtmlNode> result, HtmlNode htmlNode, bool isRecursive, string tagName, string attributeName, string attributeValue, bool isEnoughContainsAttribute, bool isSearchAsSingleString = true)

Parameters

result List<HtmlNode>

The list to add found nodes to.

htmlNode HtmlNode

The HTML node to search in.

isRecursive bool

Whether to search recursively.

tagName string

The tag name to search for.

attributeName string

The attribute name to match.

attributeValue string

The attribute value to match.

isEnoughContainsAttribute bool

Whether partial match is sufficient.

isSearchAsSingleString bool

Whether to search as a single string.

RecursiveReturnTagsWithContainsAttr(List<HtmlNode>, HtmlNode, bool, string, string, string, bool, bool, bool)

Recursively returns tags with attribute containing specified value. Use "*" in tagName to return all tags.

public static void RecursiveReturnTagsWithContainsAttr(List<HtmlNode> result, HtmlNode htmlNode, bool isRecursive, string tagName, string attributeName, string attributeValue, bool isWildCard, bool isEnoughContainsAttribute, bool isSearchAsSingleString = true)

Parameters

result List<HtmlNode>

The list to add found nodes to.

htmlNode HtmlNode

The HTML node to search in.

isRecursive bool

Whether to search recursively.

tagName string

The tag name to search for, or "*" for all tags.

attributeName string

The attribute name to match.

attributeValue string

The attribute value to match.

isWildCard bool

Whether to use wildcard matching.

isEnoughContainsAttribute bool

Whether partial match is sufficient.

isSearchAsSingleString bool

Whether to search as a single string.

ReplacePlainUriForAnchors(HtmlDocument, string)

Replaces plain URIs in text with HTML anchor tags using the provided HtmlDocument.

[SuppressMessage("Design", "CA1055:UriReturnValuesShouldNotBeStrings")]
public static string ReplacePlainUriForAnchors(HtmlDocument htmlDocument, string html)

Parameters

htmlDocument HtmlDocument

The HtmlDocument to use for parsing.

html string

The HTML string to process.

Returns

string

HTML string with plain URIs converted to anchor tags.

ReplacePlainUriForAnchors(string)

Replaces plain URIs in text with HTML anchor tags.

[SuppressMessage("Design", "CA1055:UriReturnValuesShouldNotBeStrings")]
public static string ReplacePlainUriForAnchors(string html)

Parameters

html string

The HTML string to process.

Returns

string

HTML string with plain URIs converted to anchor tags.

TrimComments(IList<HtmlNode>)

Removes comment nodes from a list of HTML nodes.

public static IList<HtmlNode> TrimComments(IList<HtmlNode> nodes)

Parameters

nodes IList<HtmlNode>

The list of HTML nodes to process.

Returns

IList<HtmlNode>

List of nodes with comments removed.

TrimTexts(HtmlNodeCollection)

Removes text nodes from an HTML node collection, keeping everything else.

public static List<HtmlNode> TrimTexts(HtmlNodeCollection htmlNodeCollection)

Parameters

htmlNodeCollection HtmlNodeCollection

The HTML node collection to trim.

Returns

List<HtmlNode>

List of nodes with text nodes removed.

TrimTexts(List<HtmlNode>)

Removes text nodes but not comment nodes from a list of HTML nodes.

public static List<HtmlNode> TrimTexts(List<HtmlNode> nodes)

Parameters

nodes List<HtmlNode>

The list of HTML nodes to trim.

Returns

List<HtmlNode>

List of nodes with text nodes removed.

WrapIntoTagIfNot(string, string)

Wraps the input string in an HTML tag if it doesn't already start with a tag.

public static string WrapIntoTagIfNot(string html, string tag = "div")

Parameters

html string

The string to potentially wrap.

tag string

The HTML tag to use for wrapping (default is div).

Returns

string

The wrapped HTML string.