Extract Content Mercury Utils
ai-research-agent / extractor/html-to-content/extract-content/extract-content-mercury-utils
brsToPs()
function brsToPs(document: any): any;Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:349
Parameters
| Parameter | Type |
|---|---|
|
|
Returns
any
cleanAttributes()
function cleanAttributes(article: any, document: any): any;Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:464
Parameters
| Parameter | Type |
|---|---|
|
|
|
|
Returns
any
cleanHOnes()
function cleanHOnes(article: any, document: any): any;Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:449
Parameters
| Parameter | Type |
|---|---|
|
|
|
|
Returns
any
cleanImages()
function cleanImages(article: any, document: any): any;Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:424
Parameters
| Parameter | Type |
|---|---|
|
|
|
|
Returns
any
convertNodeTo()
function convertNodeTo(
node: any,
document: any,
tag: string): any;Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:326
Parameters
| Parameter | Type | Default value |
|---|---|---|
|
|
|
|
|
|
|
|
|
Returns
any
convertToParagraphs()
function convertToParagraphs(document: any): any;Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:392
Parameters
| Parameter | Type |
|---|---|
|
|
Returns
any
getAttrs()
function getAttrs(node: any): any;Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:316
Parameters
| Parameter | Type |
|---|---|
|
|
Returns
any
isWordpress()
function isWordpress(document: any): boolean;Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:645
Parameters
| Parameter | Type |
|---|---|
|
|
Returns
boolean
linkDensity()
function linkDensity(node: any): number;Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:578
Parameters
| Parameter | Type |
|---|---|
|
|
Returns
number
nodeIsSufficient()
function nodeIsSufficient(node: any): boolean;Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:640
Parameters
| Parameter | Type |
|---|---|
|
|
Returns
boolean
normalizeSpaces()
function normalizeSpaces(text: any): any;Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:288
Parameters
| Parameter | Type |
|---|---|
|
|
Returns
any
paragraphize()
function paragraphize(
node: any,
document: any,
br: boolean): any;Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:293
Parameters
| Parameter | Type | Default value |
|---|---|---|
|
|
|
|
|
|
|
|
|
Returns
any
removeEmpty()
function removeEmpty(article: any): any;Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:495
Parameters
| Parameter | Type |
|---|---|
|
|
Returns
any
removeUnlessContent()
function removeUnlessContent(node: any, weight: any): void;Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:508
Parameters
| Parameter | Type |
|---|---|
|
|
|
|
Returns
void
rewriteTopLevel()
function rewriteTopLevel(article: any, document: any): any;Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:565
Parameters
| Parameter | Type |
|---|---|
|
|
|
|
Returns
any
setAttr()
function setAttr(
node: any,
attr: any,
val: any): any;Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:650
Parameters
| Parameter | Type |
|---|---|
|
|
|
|
|
|
Returns
any
setAttrs()
function setAttrs(node: any, attrs: any): any;Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:656
Parameters
| Parameter | Type |
|---|---|
|
|
|
|
Returns
any
stripJunkTags()
function stripJunkTags(
article: any,
document: any,
tags: any[]): any;Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:434
Parameters
| Parameter | Type | Default value |
|---|---|---|
|
|
|
|
|
|
|
|
|
Returns
any
stripTags()
function stripTags(text: any, document: any): any;Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:597
Parameters
| Parameter | Type |
|---|---|
|
|
|
|
Returns
any
stripUnlikelyCandidates()
function stripUnlikelyCandidates(document: any): any;Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:605
Parameters
| Parameter | Type |
|---|---|
|
|
Returns
any
textLength()
function textLength(text: any): any;Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:573
Parameters
| Parameter | Type |
|---|---|
|
|
Returns
any
withinComment()
function withinComment(node: any): boolean;Defined in: src/extractor/html-to-content/extract-content/extract-content-mercury-utils.js:624
Parameters
| Parameter | Type |
|---|---|
|
|
Returns
boolean