Skip to main content

extract-content-mercury-utils

Documentation / extractor/html-to-content/extract-content/extract-content-mercury-utils

brsToPs()​

function brsToPs(document: any): any;

Defined in: extractor/html-to-content/extract-content/extract-content-mercury-utils.js:349

Parameters​

ParameterType

document

any

Returns​

any


cleanAttributes()​

function cleanAttributes(article: any, document: any): any;

Defined in: extractor/html-to-content/extract-content/extract-content-mercury-utils.js:464

Parameters​

ParameterType

article

any

document

any

Returns​

any


cleanHOnes()​

function cleanHOnes(article: any, document: any): any;

Defined in: extractor/html-to-content/extract-content/extract-content-mercury-utils.js:449

Parameters​

ParameterType

article

any

document

any

Returns​

any


cleanImages()​

function cleanImages(article: any, document: any): any;

Defined in: extractor/html-to-content/extract-content/extract-content-mercury-utils.js:424

Parameters​

ParameterType

article

any

document

any

Returns​

any


convertNodeTo()​

function convertNodeTo(
node: any,
document: any,
tag: string): any;

Defined in: extractor/html-to-content/extract-content/extract-content-mercury-utils.js:326

Parameters​

ParameterTypeDefault value

node

any

undefined

document

any

undefined

tag

string

"p"

Returns​

any


convertToParagraphs()​

function convertToParagraphs(document: any): any;

Defined in: extractor/html-to-content/extract-content/extract-content-mercury-utils.js:392

Parameters​

ParameterType

document

any

Returns​

any


getAttrs()​

function getAttrs(node: any): any;

Defined in: extractor/html-to-content/extract-content/extract-content-mercury-utils.js:316

Parameters​

ParameterType

node

any

Returns​

any


isWordpress()​

function isWordpress(document: any): boolean;

Defined in: extractor/html-to-content/extract-content/extract-content-mercury-utils.js:645

Parameters​

ParameterType

document

any

Returns​

boolean


linkDensity()​

function linkDensity(node: any): number;

Defined in: extractor/html-to-content/extract-content/extract-content-mercury-utils.js:578

Parameters​

ParameterType

node

any

Returns​

number


nodeIsSufficient()​

function nodeIsSufficient(node: any): boolean;

Defined in: extractor/html-to-content/extract-content/extract-content-mercury-utils.js:640

Parameters​

ParameterType

node

any

Returns​

boolean


normalizeSpaces()​

function normalizeSpaces(text: any): any;

Defined in: extractor/html-to-content/extract-content/extract-content-mercury-utils.js:288

Parameters​

ParameterType

text

any

Returns​

any


paragraphize()​

function paragraphize(
node: any,
document: any,
br: boolean): any;

Defined in: extractor/html-to-content/extract-content/extract-content-mercury-utils.js:293

Parameters​

ParameterTypeDefault value

node

any

undefined

document

any

undefined

br

boolean

false

Returns​

any


removeEmpty()​

function removeEmpty(article: any): any;

Defined in: extractor/html-to-content/extract-content/extract-content-mercury-utils.js:495

Parameters​

ParameterType

article

any

Returns​

any


removeUnlessContent()​

function removeUnlessContent(node: any, weight: any): void;

Defined in: extractor/html-to-content/extract-content/extract-content-mercury-utils.js:508

Parameters​

ParameterType

node

any

weight

any

Returns​

void


rewriteTopLevel()​

function rewriteTopLevel(article: any, document: any): any;

Defined in: extractor/html-to-content/extract-content/extract-content-mercury-utils.js:565

Parameters​

ParameterType

article

any

document

any

Returns​

any


setAttr()​

function setAttr(
node: any,
attr: any,
val: any): any;

Defined in: extractor/html-to-content/extract-content/extract-content-mercury-utils.js:650

Parameters​

ParameterType

node

any

attr

any

val

any

Returns​

any


setAttrs()​

function setAttrs(node: any, attrs: any): any;

Defined in: extractor/html-to-content/extract-content/extract-content-mercury-utils.js:656

Parameters​

ParameterType

node

any

attrs

any

Returns​

any


stripJunkTags()​

function stripJunkTags(
article: any,
document: any,
tags: any[]): any;

Defined in: extractor/html-to-content/extract-content/extract-content-mercury-utils.js:434

Parameters​

ParameterTypeDefault value

article

any

undefined

document

any

undefined

tags

any[]

[]

Returns​

any


stripTags()​

function stripTags(text: any, document: any): any;

Defined in: extractor/html-to-content/extract-content/extract-content-mercury-utils.js:597

Parameters​

ParameterType

text

any

document

any

Returns​

any


stripUnlikelyCandidates()​

function stripUnlikelyCandidates(document: any): any;

Defined in: extractor/html-to-content/extract-content/extract-content-mercury-utils.js:605

Parameters​

ParameterType

document

any

Returns​

any


textLength()​

function textLength(text: any): any;

Defined in: extractor/html-to-content/extract-content/extract-content-mercury-utils.js:573

Parameters​

ParameterType

text

any

Returns​

any


withinComment()​

function withinComment(node: any): boolean;

Defined in: extractor/html-to-content/extract-content/extract-content-mercury-utils.js:624

Parameters​

ParameterType

node

any

Returns​

boolean