JSDoc: Source: parser.js

// Licensed under the MIT License
// https://github.com/craigahobbs/markdown-model/blob/main/LICENSE

/** @module lib/parser */


/**
 * Escape a string for inclusion in Markdown text
 *
 * @param {string} text
 * @returns {string}
 */
export function escapeMarkdownText(text) {
    return text.replace(rEscapeMarkdownText, '\\$1');
}

const rEscapeMarkdownText = /([\\[\]()<>"'*_~`#=+|-])/g;


/**
 * Get a Markdown model's title. Returns null if no title is found.
 *
 * @param {Object} markdown - The [Markdown model]{@link https://craigahobbs.github.io/markdown-model/model/#var.vName='Markdown'}
 * @returns {string|null}
 */
export function getMarkdownTitle(markdown) {
    for (const part of markdown.parts) {
        if ('paragraph' in part && 'style' in part.paragraph) {
            return getMarkdownParagraphText(part.paragraph);
        }
    }
    return null;
}


/**
 * Get a Markdown paragraph model's text
 *
 * @param {Object} paragraph - The
 *     [Markdown paragraph model]{@link https://craigahobbs.github.io/markdown-model/model/#var.vName='Paragraph'}
 * @returns {string}
 */
export function getMarkdownParagraphText(paragraph) {
    return paragraph.spans.map(getMarkdownSpanText).join('');
}


// Helper function to get a Markdown span model's text
function getMarkdownSpanText(span) {
    if ('image' in span) {
        return span.image.alt;
    } else if ('link' in span) {
        return span.link.spans.map(getMarkdownSpanText).join('');
    } else if ('style' in span) {
        return span.style.spans.map(getMarkdownSpanText).join('');
    }
    return span.text;
}


// Markdown regex
const rLineSplit = /\r?\n/;
const rParagraphEmpty = /^\s*$/;
const rIndent = /^(?<indent> *)(?<notIndent>.*)$/;
const rHeading = /^ {0,3}(?<heading>#{1,6})\s+(?<text>.*?)(?:\s+#+)?\s*$/;
const rHeadingAlt = /^ {0,3}(?<heading>=+|-+)\s*$/;
const rHorizontal = /^ {0,3}(?:(?:\*\s*){3,}|(?:-\s*){3,}|(?:_\s*){3,})$/;
const rFenced = /^(?<indent> {0,3})(?<fence>(?:`{3,}|~{3,}))(?:\s*(?<language>.+?))?\s*$/;
const rList = /^(?<indent> {0,3}(?<mark>-|\*|\+|[0-9][.)]|[1-9][0-9]+[.)])\s)(?<line>.*)$/;
const rQuote = /^(?<indent> {0,3}>\s?)/;
const rTable = /^ {0,3}(?::?-+:?\s*)?(?:\|\s*:?-+:?\s*)+(?:\|\s*)?$/g;
const rTableRow = /^ {0,3}(?:(?:\\\||[^|])+\s*)?(?:\|\s*(?:\\\||[^|])*?\s*)+(?:\|\s*)?/g;
const rTableRowTrim = /^\s*\|?/;
const rTableCell = /^\s*(?<cell>(?:\\\||[^|])*?)\s*\|/;
const rTableEscape = /\\(\\|)/g;


/**
 * Parse markdown text or text lines into a Markdown model
 *
 * @param {string|string[]} markdown - Markdown text or text lines. Null text lines are ignored.
 * @param {number} [startLineNumber = 1] - The starting line number of the markdown text
 * @returns {Object} The [Markdown model]{@link https://craigahobbs.github.io/markdown-model/model/#var.vName='Markdown'}
 */
export function parseMarkdown(markdown, startLineNumber = 1) {
    return parseMarkdownInternal(markdown, startLineNumber, null);
}

function parseMarkdownInternal(markdown, startLineNumber, linkRefsRaw) {
    const linkRefs = linkRefsRaw ?? {'defs': {}, 'links': []};
    const markdownParts = [];
    let paragraphLines = [];
    let paragraphPart = null;
    let paragraphLineNumber = null;
    let tablePart = null;
    let fencedMark = null;
    let fencedIndent = null;
    let listIndent = null;
    let lineNumber = startLineNumber - 1;

    // Helper function to close the current part
    const closeParagraph = (paragraphStyle = null) => {
        // Block quote "paragraph"
        if (paragraphPart !== null && 'quote' in paragraphPart) {
            // Parse the block quote's Markdown lines
            paragraphPart.quote.parts = parseMarkdownInternal(paragraphLines, paragraphLineNumber, linkRefs).parts;
            paragraphLines = [];

        // List item "paragraph"?
        } else if (paragraphPart !== null && 'list' in paragraphPart) {
            // Parse the list item's Markdown lines
            const {items} = paragraphPart.list;
            items[items.length - 1].parts = parseMarkdownInternal(paragraphLines, paragraphLineNumber, linkRefs).parts;
            paragraphLines = [];

        // Code block "paragraph"?
        } else if (paragraphPart !== null && 'codeBlock' in paragraphPart) {
            // Set the code block lines - strip trailing blank lines of non-fenced code blocks
            let ixLine = paragraphLines.length - 1;
            if (fencedMark === null) {
                for (; ixLine >= 0; ixLine--) {
                    if (paragraphLines[ixLine] !== '') {
                        break;
                    }
                }
            }
            paragraphPart.codeBlock.lines = paragraphLines.slice(0, ixLine + 1);
            paragraphLines = [];

        // Ordinary (or header) paragraph...
        } else if (paragraphLines.length) {
            // Process link reference definitions
            let text = paragraphLines.join('\n');
            let matchLinkDef = text.match(rLinkDef);
            while (matchLinkDef !== null) {
                const [linkText, linkHref, linkTitle] = getLinkText(matchLinkDef.groups, 'link');

                // Empty link reference key? If so, do nothing...
                const linkRefKey = getLinkRefKey(linkText);
                if (linkRefKey === '') {
                    break;
                }

                // Record the link reference definition (unless its already defined)
                if (!(linkRefKey in linkRefs.defs)) {
                    linkRefs.defs[linkRefKey] = {linkText, linkHref, linkTitle};
                }

                // Check for more link reference definitions
                text = text.slice(matchLinkDef[0].length);
                matchLinkDef = text.match(rLinkDef);
            }

            // Parse the paragraph spans (if there's any text left)
            if (paragraphStyle !== null || !rParagraphEmpty.test(text)) {
                const partSpans = paragraphSpans(text, linkRefs);
                const part = {'paragraph': {'spans': partSpans}};
                if (paragraphStyle !== null) {
                    part.paragraph.style = paragraphStyle;
                }
                markdownParts.push(part);
            }
            paragraphLines = [];
        }

        // Clear paragraph state
        paragraphPart = null;
        paragraphLineNumber = null;
        tablePart = null;
    };

    // Process markdown text line by line
    let emptyLine = true;
    let emptyLinePrev;
    const markdownStrings = (typeof markdown === 'string' ? [markdown] : markdown);
    for (const markdownString of markdownStrings) {
        if (markdownString === null) {
            continue;
        }
        for (const lineRaw of markdownString.split(rLineSplit)) {
            const line = lineRaw.replaceAll('\t', '    ');
            const matchLine = line.match(rIndent);
            const lineIndent = matchLine.groups.indent.length;
            emptyLinePrev = emptyLine;
            emptyLine = matchLine.groups.notIndent === '';
            lineNumber += 1;

            // Empty line?
            if (emptyLine) {
                // If there is a container part, add the empty line to the part
                if (paragraphPart !== null && !('quote' in paragraphPart)) {
                    paragraphLines.push(line);
                } else {
                    closeParagraph();
                }
                continue;
            }

            // Within fenced code block?
            const matchFenced = line.match(rFenced);
            if (fencedMark !== null) {
                // Fenced code block end?
                if (matchFenced !== null && matchFenced.groups.fence.startsWith(fencedMark) &&
                    typeof matchFenced.groups.language === 'undefined') {
                    closeParagraph();
                    fencedMark = null;
                    fencedIndent = null;
                } else {
                    paragraphLines.push(line.slice(Math.min(fencedIndent, lineIndent)));
                }
                continue;
            }

            // List item line?
            if (listIndent !== null && lineIndent >= listIndent) {
                paragraphLines.push(line.slice(listIndent));
                continue;
            }

            // Code block line?
            if (lineIndent >= 4 && paragraphPart !== null && 'codeBlock' in paragraphPart) {
                paragraphLines.push(line.slice(4));
                continue;
            }

            // New code block?
            if (lineIndent >= 4 && (emptyLinePrev || paragraphLines.length === 0)) {
                closeParagraph();
                paragraphPart = {'codeBlock': {'startLineNumber': lineNumber}};
                markdownParts.push(paragraphPart);
                paragraphLineNumber = lineNumber;
                paragraphLines.push(line.slice(4));
                continue;
            }

            // Fenced code start?
            if (matchFenced !== null) {
                closeParagraph();
                paragraphPart = {'codeBlock': {'startLineNumber': lineNumber}};
                if (typeof matchFenced.groups.language !== 'undefined') {
                    paragraphPart.codeBlock.language = matchFenced.groups.language;
                }
                markdownParts.push(paragraphPart);
                paragraphLineNumber = lineNumber + 1;
                fencedMark = matchFenced.groups.fence;
                fencedIndent = matchFenced.groups.indent.length;
                continue;
            }

            // Block quote?
            const matchQuote = line.match(rQuote);
            if (matchQuote !== null) {
                if (paragraphPart === null || !('quote' in paragraphPart)) {
                    closeParagraph();
                    paragraphPart = {'quote': {}};
                    markdownParts.push(paragraphPart);
                    paragraphLineNumber = lineNumber;
                }
                paragraphLines.push(line.slice(matchQuote.groups.indent.length));
                continue;
            }

            // Heading?
            const matchHeading = line.match(rHeading);
            if (matchHeading !== null) {
                closeParagraph();
                paragraphLines = [matchHeading.groups.text];
                closeParagraph(`h${matchHeading.groups.heading.length}`);
                continue;
            }

            // Heading (alternate syntax)?
            const matchHeadingAlt = line.match(rHeadingAlt);
            if (matchHeadingAlt !== null && paragraphLines.length !== 0 && paragraphPart === null) {
                closeParagraph(matchHeadingAlt.groups.heading.startsWith('=') ? 'h1' : 'h2');
                continue;
            }

            // Horizontal rule?
            if (rHorizontal.test(line)) {
                closeParagraph();
                markdownParts.push({'hr': 1});
                continue;
            }

            // List?
            const matchList = line.match(rList);
            if (matchList !== null) {
                const curList = (paragraphPart !== null && 'list' in paragraphPart ? paragraphPart : null);
                const curListIsNumbered = (curList !== null && typeof curList.list.start === 'number');
                const start = parseInt(matchList.groups.mark, 10);
                const isNumbered = !isNaN(start);

                // Close current paragraph
                closeParagraph();

                // New list?
                if (curList === null || curListIsNumbered !== isNumbered) {
                    paragraphPart = {'list': {'items': [{}]}};
                    if (isNumbered) {
                        paragraphPart.list.start = start;
                    }
                    markdownParts.push(paragraphPart);
                } else {
                    paragraphPart = curList;
                    curList.list.items.push({});
                }
                paragraphLineNumber = lineNumber;

                // Add the list item line
                paragraphLines.push(matchList.groups.line);
                listIndent = matchList.groups.indent.length;
                continue;
            }

            // Table?
            if (tablePart !== null) {
                // Table row?
                const matchTableRow = line.match(rTableRow);
                if (matchTableRow !== null) {
                    if (!('rows' in tablePart.table)) {
                        tablePart.table.rows = [];
                    }
                    const cells = parseTableCells(line);
                    if (cells.length > tablePart.table.headers.length) {
                        cells.length = tablePart.table.headers.length;
                    }
                    tablePart.table.rows.push(cells.map((cell) => paragraphSpans(cell, linkRefs)));
                    continue;
                } else {
                    tablePart = null;
                }
            } else {
                // Table delimiter following a table header?
                const tableHeader = (paragraphLines.length !== 0 ? paragraphLines[paragraphLines.length - 1] : null);
                const matchTable = (tableHeader !== null ? line.match(rTable) : null);
                const matchTableHeader = (tableHeader !== null && matchTable !== null ? tableHeader.match(rTableRow) : null);
                if (matchTableHeader !== null) {
                    // Does the table header match the delimiter?
                    const headers = parseTableCells(tableHeader).map((cell) => paragraphSpans(cell, linkRefs));
                    const aligns = parseTableCells(line).map(
                        (cell) => (cell.endsWith(':') ? (cell.startsWith(':') ? 'center' : 'right') : 'left')
                    );
                    if (headers.length === aligns.length) {
                        // Remove the table header line and close the open paragraph
                        paragraphLines.length -= 1;
                        closeParagraph();

                        // Add the table markdown part
                        tablePart = {'table': {headers, aligns}};
                        markdownParts.push(tablePart);
                        continue;
                    }
                }
            }

            // End code block?
            if (paragraphPart !== null && 'codeBlock' in paragraphPart) {
                closeParagraph();

            // End list?
            } else if (listIndent !== null && emptyLinePrev) {
                closeParagraph();
                listIndent = null;
            }

            // Add the paragraph line
            paragraphLines.push(line);
        }
    }

    // Close current paragraph
    closeParagraph();

    // Resolve link references
    if (linkRefsRaw === null) {
        for (const linkRef of linkRefs.links) {
            if (linkRef.refKey in linkRefs.defs) {
                const {linkHref, linkTitle = null} = linkRefs.defs[linkRef.refKey];
                const {linkSpan = null} = linkRef;
                if (linkSpan !== null) {
                    linkSpan.link.href = linkHref;
                    if (linkTitle !== null) {
                        linkSpan.link.title = linkTitle;
                    }
                    linkRef.linkRefSpan.linkRef.spans = [linkSpan];
                } else {
                    const {imageSpan} = linkRef;
                    imageSpan.image.src = linkHref;
                    if (linkTitle !== null) {
                        imageSpan.image.title = linkTitle;
                    }
                    linkRef.linkRefSpan.linkRef.spans = [imageSpan];
                }
            }
        }
    }

    return {'parts': markdownParts};
}


// Helper function to parse a table line's cells
function parseTableCells(line) {
    const cells = [];
    let matchCell;
    let lineText = line.replace(rTableRowTrim, '');
    while ((matchCell = lineText.match(rTableCell)) !== null) {
        cells.push(matchCell.groups.cell.replaceAll(rTableEscape, '$1'));
        lineText = lineText.slice(matchCell[0].length);
    }
    lineText = lineText.trim();
    if (lineText !== '') {
        cells.push(lineText);
    }
    return cells;
}


// Markdown span regex
const rLinkText = '(?<linkText>(?:\\\\.|[^\\\\\\]])*)';
const rLinkHref = '[ \\r\\n]*(?<linkHref>' +
      '<(?:\\\\[^\\r\\n]|[^\\r\\n>\\\\])*>|' +
      '(?!<)(?:\\\\[^ \\r\\n]|[^ \\r\\n\\\\)])*' +
      ')' +
      '(?:[ \\r\\n]+(?<linkTitle>' +
      '"(?:\\\\.|[^\\\\"])*"|' +
      "'(?:\\\\.|[^\\\\'])*'|" +
      '\\((?:\\\\.|[^\\\\)])*\\)' +
      '))?[ \\r\\n]*';
const rSpans = new RegExp(
    '(?<br>(?: {2,}|(?<!\\\\)\\\\)\\r?\\n)|' +
    `(?<linkImg>\\[\\s*!\\[${rLinkText.replaceAll('<link', '<linkImg')}\\]` +
        `\\(${rLinkHref.replaceAll('<link', '<linkImg')}\\)\\s*\\]` +
        `\\(${rLinkHref.replaceAll('<link', '<linkImgLink')}\\))|` +
    `(?<linkImgRef>\\[(?<linkImgRefFull>\\s*!(?:\\[${rLinkText.replaceAll('<link', '<linkImgRefImg')}\\])?` +
        `\\[${rLinkText.replaceAll('<link', '<linkImgRef')}\\])\\s*\\]` +
        `\\(${rLinkHref.replaceAll('<link', '<linkImgRefLink')}\\))|` +
    `(?<linkRefImg>\\[\\s*!\\[${rLinkText.replaceAll('<link', '<linkRefImg')}\\]` +
        `\\(${rLinkHref.replaceAll('<link', '<linkRefImg')}\\)\\s*\\]` +
        `\\[${rLinkText.replaceAll('<link', '<linkRefImgLink')}\\])|` +
    `(?<linkRefImgRef>\\[(?<linkRefImgRefFull>\\s*!(?:\\[${rLinkText.replaceAll('<link', '<linkRefImgRefImg')}\\])?` +
        `\\[${rLinkText.replaceAll('<link', '<linkRefImgRef')}\\])\\s*\\]` +
        `\\[${rLinkText.replaceAll('<link', '<linkRefImgRefLink')}\\])|` +
    `(?<link>!?\\[${rLinkText}\\]\\(${rLinkHref}\\))|` +
    `(?<linkRef>(?<!\\\\)!?(?:\\[${rLinkText.replaceAll('<link', '<linkRefOther')}\\])?` +
        `\\[${rLinkText.replaceAll('<link', '<linkRef')}\\])|` +
    '(?<linkAlt><(?<linkAltScheme>[[A-Za-z]{3,}:|[a-zA-Z0-9.!#$%&\'*+/=?^_`{|}~-]+@)[^ \\r\\n]+>)|' +
    '(?<bold>(?<!\\\\)(?<!\\*)\\*{2,}(?!\\**\\s)(?:[^\\s\\\\*]|\\\\.|\\s(?!\\*{2,})|\\*(?!\\*))+\\*{2,})|' +
    '(?<italic>(?<!\\\\)(?<!\\*)\\*(?!\\**\\s)(?:[^\\s\\\\*]|\\\\.|\\s(?!\\*))+\\*)|' +
    '(?<boldu>(?<!\\\\)(?<!_)(?<![A-Za-z0-9])_{2,}(?!_*\\s)(?:[^\\s\\\\_]|\\\\.|\\s(?!_{2,})|_(?!_))+_{2,}(?!_*[A-Za-z0-9]))|' +
    '(?<italicu>(?<!\\\\)(?<!_)(?<![A-Za-z0-9])_(?!_*\\s)(?:[^\\s\\\\_]|\\\\.|\\s(?!_))+_(?!_*[A-Za-z0-9]))|' +
    '(?<strike>(?<!\\\\)(?<!~)(?<strikeT>~~?)(?!~)(?!\\s)' +
        '(?:[^\\s\\\\~]|\\\\.|\\s(?!\\k<strikeT>(?!~))|(?!\\k<strikeT>(?!~))~+(?!~))+\\k<strikeT>(?!~))|' +
    '(?<code>(?<!\\\\)(?<!`)(?<codeT>`+)(?!`)(?:[^`]|(?!\\k<codeT>(?!`))`+(?!`))*\\k<codeT>(?!`))',
    'g'
);
const rLinkDef = new RegExp(`^ {0,3}\\[${rLinkText}\\]:[ \\r\\n]*${rLinkHref.replace(')])*)', '])+)')}(\\r?\\n|$)`);
const rLinkRefSpace = /\s+/g;
const rCodeSpaces = /^ (.+) $/;
const rCodeNewlines = /\r?\n/g;


// Helper function to translate markdown paragraph text to a markdown paragraph span model array
function paragraphSpans(text, linkRefs) {
    // Iterate the span matches
    const spans = [];
    let ixSearch = 0;
    for (const match of text.matchAll(rSpans)) {
        const matchGroups = match.groups;

        // Add any preceding text
        if (ixSearch < match.index) {
            spans.push({'text': removeEscapes(text.slice(ixSearch, match.index))});
        }

        // Line break?
        if (typeof matchGroups.br !== 'undefined') {
            spans.push({'br': 1});

        // Link with inline image?
        } else if (typeof matchGroups.linkImg !== 'undefined') {
            const [linkImgText, linkImgHref, linkImgTitle] = getLinkText(matchGroups, 'linkImg');
            const [, linkImgLinkHref, linkImgLinkTitle] = getLinkText(matchGroups, 'linkImgLink');
            const imgSpan = createImageSpan(linkImgHref, linkImgText, linkImgTitle);
            spans.push(createLinkSpan(linkImgLinkHref, [imgSpan], linkImgLinkTitle));

        // Link with inline image reference
        } else if (typeof matchGroups.linkImgRef !== 'undefined') {
            const {linkImgRefText, linkImgRefImgText, linkImgRefFull} = matchGroups;
            const [, linkImgRefLinkHref, linkImgRefLinkTitle] = getLinkText(matchGroups, 'linkImgRefLink');
            const imgSpan = createImageRefSpan(linkImgRefText, linkImgRefImgText, linkImgRefFull, linkRefs, true);
            spans.push(createLinkSpan(linkImgRefLinkHref, [imgSpan], linkImgRefLinkTitle));

        // Link reference with inline image
        } else if (typeof matchGroups.linkRefImg !== 'undefined') {
            const [linkRefImgText, linkRefImgHref, linkRefImgTitle] = getLinkText(matchGroups, 'linkRefImg');
            const {linkRefImgLinkText} = matchGroups;
            const imgSpan = createImageSpan(linkRefImgHref, linkRefImgText, linkRefImgTitle);
            spans.push(createLinkRefSpan(linkRefImgLinkText, [imgSpan], match[0], linkRefs, true));

        // Link reference with inline image reference
        } else if (typeof matchGroups.linkRefImgRef !== 'undefined') {
            const {linkRefImgRefText, linkRefImgRefImgText, linkRefImgRefFull, linkRefImgRefLinkText} = matchGroups;
            const imgSpan = createImageRefSpan(linkRefImgRefText, linkRefImgRefImgText, linkRefImgRefFull, linkRefs, true);
            spans.push(createLinkRefSpan(linkRefImgRefLinkText, [imgSpan], match[0], linkRefs, true));

        // Link or image?
        } else if (typeof matchGroups.link !== 'undefined') {
            const [linkText, linkHref, linkTitle] = getLinkText(matchGroups, 'link');
            if (matchGroups.link.startsWith('!')) {
                spans.push(createImageSpan(linkHref, linkText, linkTitle));
            } else {
                spans.push(createLinkSpan(linkHref, linkText, linkTitle, linkRefs));
            }

        // Link reference?
        } else if (typeof matchGroups.linkRef !== 'undefined') {
            const {linkRefText, linkRefOtherText = null} = matchGroups;
            if (matchGroups.linkRef.startsWith('!')) {
                spans.push(createImageRefSpan(linkRefText, linkRefOtherText, match[0], linkRefs));
            } else {
                spans.push(createLinkRefSpan(linkRefText, linkRefOtherText, match[0], linkRefs));
            }

        // Link (alternate syntax)?
        } else if (typeof matchGroups.linkAlt !== 'undefined') {
            const {linkAlt, linkAltScheme} = matchGroups;
            const linkAltHref = linkAlt.slice(1, linkAlt.length - 1);
            const linkHref = (linkAltScheme.endsWith('@') ? `mailto:${linkAltHref}` : linkAltHref);
            spans.push({'link': {'href': linkHref, 'spans': [{'text': linkHref}]}});

        // Bold style?
        } else if (typeof matchGroups.bold !== 'undefined' || typeof matchGroups.boldu !== 'undefined') {
            const bold = matchGroups.bold ?? matchGroups.boldu;
            const boldText = bold.slice(2, bold.length - 2);
            spans.push({'style': {'style': 'bold', 'spans': paragraphSpans(boldText, linkRefs)}});

        // Italic style?
        } else if (typeof matchGroups.italic !== 'undefined' || typeof matchGroups.italicu !== 'undefined') {
            const italic = matchGroups.italic ?? matchGroups.italicu;
            const italicText = italic.slice(1, italic.length - 1);
            spans.push({'style': {'style': 'italic', 'spans': paragraphSpans(italicText, linkRefs)}});

        // Strikethrough style?
        } else if (typeof matchGroups.strike !== 'undefined') {
            const {strike, strikeT} = matchGroups;
            const strikeText = strike.slice(strikeT.length, strike.length - strikeT.length);
            spans.push({'style': {'style': 'strikethrough', 'spans': paragraphSpans(strikeText, linkRefs)}});

        // Code?
        } else if (typeof matchGroups.code !== 'undefined') {
            const {code, codeT} = matchGroups;
            const codeText = code.slice(codeT.length, code.length - codeT.length);
            const codeScrubbed = codeText.replaceAll(rCodeNewlines, ' ').replace(rCodeSpaces, '$1');
            spans.push({'code': codeScrubbed});
        }

        ixSearch = match.index + match[0].length;
    }

    // Add any remaining text
    if (ixSearch < text.length) {
        spans.push({'text': removeEscapes(text.slice(ixSearch))});
    }

    return spans;
}


// Helper function to get a link/image span's [text, href, title]
function getLinkText(matchGroups, prefix) {
    const text = matchGroups[`${prefix}Text`] ?? null;
    let href = matchGroups[`${prefix}Href`];
    href = removeEscapes(href.startsWith('<') ? href.slice(1, href.length - 1) : href, true);
    let title = matchGroups[`${prefix}Title`] ?? null;
    title = (title !== null ? removeEscapes(title.slice(1, title.length - 1), true) : null);
    return [text, href, title];
}


// Helper function to cleanup an image span's alt text and title
function getImageAltText(text) {
    return removeEscapes(text, true).replaceAll(rLinkRefSpace, ' ');
}


// Helper function to get a link reference key
function getLinkRefKey(text) {
    return text.trim().replaceAll(rLinkRefSpace, ' ').toLowerCase();
}


// Helper function to create a link span
function createLinkSpan(href, text, title, linkRefs) {
    const linkSpan = {'link': {
        'href': href,
        'spans': (Array.isArray(text) ? text : paragraphSpans(text, linkRefs))
    }};
    if (title !== null) {
        linkSpan.link.title = title;
    }
    return linkSpan;
}


// Helper function to create an image span
function createImageSpan(src, alt, title) {
    const imageSpan = {'image': {'src': src, 'alt': getImageAltText(alt)}};
    if (title !== null) {
        imageSpan.image.title = title;
    }
    return imageSpan;
}


// Helper function to create a link reference span
function createLinkRefSpan(refText, optText, fullText, linkRefs, textFallback = false) {
    const linkRefSpan = {'linkRef': {'spans': createFallbackSpan(fullText, linkRefs, textFallback)}};
    linkRefs.links.push({
        'refKey': getLinkRefKey(refText),
        'linkSpan': {'link': {
            'spans': (Array.isArray(optText) ? optText : paragraphSpans(optText ?? refText, linkRefs))
        }},
        linkRefSpan
    });
    return linkRefSpan;
}


// Helper function to create an image reference span
function createImageRefSpan(refText, optText, fullText, linkRefs, textFallback = false) {
    const linkRefSpan = {'linkRef': {'spans': createFallbackSpan(fullText, linkRefs, textFallback)}};
    linkRefs.links.push({
        'refKey': getLinkRefKey(refText),
        'imageSpan': {'image': {'alt': getImageAltText(optText ?? refText)}},
        linkRefSpan
    });
    return linkRefSpan;
}


// Helper function to create a link/image reference span's fallback span
function createFallbackSpan(text, linkRefs, textFallback) {
    if (textFallback) {
        return [{'text': text}];
    }
    const linkRefFullText = `${text.slice(0, text.length - 1)}\\${text.slice(text.length - 1)}`;
    return paragraphSpans(linkRefFullText, linkRefs);
}


// Helper function to remove span text escapes and replace character references
function removeEscapes(text, href = false) {
    return text.replace(href ? rEscapeHref : rEscape, '$1').
        replace(rEntityRef, (match, entity) => {
            const entityChar = entityRefs[entity] ?? null;
            return entityChar !== null ? entityChar : match;
        }).
        replace(rDecimalRef, (match, decimal) => String.fromCharCode(parseInt(decimal, 10))).
        replace(rHexRef, (match, hex) => String.fromCharCode(parseInt(hex, 16)));
}


// Escape and entity regex
const rEscape = /\\([!-~])/g;
const rEscapeHref = /\\([!-/:-@[-`{-~])/g;
const rEntityRef = /&([A-Za-z]+[0-9]*);/g;
const rDecimalRef = /&#([0-9]{1,7});/g;
const rHexRef = /&#[Xx]([A-Fa-f0-9]{1,6});/g;
const entityRefs = {
    'acute': '\xb4',
    'amp': '\x26',
    'apos': '\x27',
    'bdquo': '\u201e',
    'brvbar': '\xa6',
    'cedil': '\xb8',
    'cent': '\xa2',
    'copy': '\xa9',
    'curren': '\xa4',
    'dagger': '\u2020',
    'Dagger': '\u2021',
    'deg': '\xb0',
    'divide': '\xf7',
    'emsp': '\u2003',
    'ensp': '\u2002',
    'euro': '\u20ac',
    'frac12': '\xbd',
    'frac14': '\xbc',
    'frac34': '\xbe',
    'gt': '\x3e',
    'hellip': '\u2026',
    'iexcl': '\xa1',
    'iquest': '\xbf',
    'laquo': '\xab',
    'ldquo': '\u201c',
    'lrm': '\u200e',
    'lsaquo': '\u2039',
    'lsquo': '\u2018',
    'lt': '\x3c',
    'macr': '\xaf',
    'mdash': '\u2014',
    'micro': '\xb5',
    'middot': '\xb7',
    'nbsp': '\xa0',
    'ndash': '\u2013',
    'not': '\xac',
    'ordf': '\xaa',
    'ordm': '\xba',
    'para': '\xb6',
    'permil': '\u2030',
    'plusmn': '\xb1',
    'pound': '\xa3',
    'quot': '\x22',
    'raquo': '\xbb',
    'rdquo': '\u201d',
    'reg': '\xae',
    'rlm': '\u200f',
    'rsaquo': '\u203a',
    'rsquo': '\u2019',
    'sbquo': '\u201a',
    'sect': '\xa7',
    'shy': '\xad',
    'sup1': '\xb9',
    'sup2': '\xb2',
    'sup3': '\xb3',
    'thinsp': '\u2009',
    'times': '\xd7',
    'trade': '\u2122',
    'uml': '\xa8',
    'yen': '\xa5',
    'zwj': '\u200d',
    'zwnj': '\u200c'
};