FunctionsExtractorHtml to contentExtract content

Extract Content Mercury Utils

ai-research-agent / extractor/html-to-content/extract-content/extract-content-mercury-utils

brsToPs()

function brsToPs(document: any): any;

Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:349

Parameters

ParameterType

document

any

Returns

any


cleanAttributes()

function cleanAttributes(article: any, document: any): any;

Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:464

Parameters

ParameterType

article

any

document

any

Returns

any


cleanHOnes()

function cleanHOnes(article: any, document: any): any;

Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:449

Parameters

ParameterType

article

any

document

any

Returns

any


cleanImages()

function cleanImages(article: any, document: any): any;

Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:424

Parameters

ParameterType

article

any

document

any

Returns

any


convertNodeTo()

function convertNodeTo(
   node: any, 
   document: any, 
   tag: string): any;

Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:326

Parameters

ParameterTypeDefault value

node

any

undefined

document

any

undefined

tag

string

"p"

Returns

any


convertToParagraphs()

function convertToParagraphs(document: any): any;

Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:392

Parameters

ParameterType

document

any

Returns

any


getAttrs()

function getAttrs(node: any): any;

Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:316

Parameters

ParameterType

node

any

Returns

any


isWordpress()

function isWordpress(document: any): boolean;

Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:645

Parameters

ParameterType

document

any

Returns

boolean


linkDensity()

function linkDensity(node: any): number;

Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:578

Parameters

ParameterType

node

any

Returns

number


nodeIsSufficient()

function nodeIsSufficient(node: any): boolean;

Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:640

Parameters

ParameterType

node

any

Returns

boolean


normalizeSpaces()

function normalizeSpaces(text: any): any;

Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:288

Parameters

ParameterType

text

any

Returns

any


paragraphize()

function paragraphize(
   node: any, 
   document: any, 
   br: boolean): any;

Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:293

Parameters

ParameterTypeDefault value

node

any

undefined

document

any

undefined

br

boolean

false

Returns

any


removeEmpty()

function removeEmpty(article: any): any;

Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:495

Parameters

ParameterType

article

any

Returns

any


removeUnlessContent()

function removeUnlessContent(node: any, weight: any): void;

Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:508

Parameters

ParameterType

node

any

weight

any

Returns

void


rewriteTopLevel()

function rewriteTopLevel(article: any, document: any): any;

Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:565

Parameters

ParameterType

article

any

document

any

Returns

any


setAttr()

function setAttr(
   node: any, 
   attr: any, 
   val: any): any;

Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:650

Parameters

ParameterType

node

any

attr

any

val

any

Returns

any


setAttrs()

function setAttrs(node: any, attrs: any): any;

Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:656

Parameters

ParameterType

node

any

attrs

any

Returns

any


stripJunkTags()

function stripJunkTags(
   article: any, 
   document: any, 
   tags: any[]): any;

Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:434

Parameters

ParameterTypeDefault value

article

any

undefined

document

any

undefined

tags

any[]

[]

Returns

any


stripTags()

function stripTags(text: any, document: any): any;

Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:597

Parameters

ParameterType

text

any

document

any

Returns

any


stripUnlikelyCandidates()

function stripUnlikelyCandidates(document: any): any;

Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:605

Parameters

ParameterType

document

any

Returns

any


textLength()

function textLength(text: any): any;

Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:573

Parameters

ParameterType

text

any

Returns

any


withinComment()

function withinComment(node: any): boolean;

Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:624

Parameters

ParameterType

node

any

Returns

boolean