Source: parser.js

// Licensed under the MIT License
// https://github.com/craigahobbs/schema-markdown-js/blob/main/LICENSE

/** @module lib/parser */

import {validateTypeModelErrors} from './schemaUtil.js';


// Built-in types
const builtinTypes = new Set(['any', 'bool', 'date', 'datetime', 'float', 'int', 'string', 'uuid']);
const builtinDeprecated = {'object': 'any'};


// Schema Markdown regex
const rPartId = '(?:[A-Za-z]\\w*)';
const rPartAttrGroup =
      '(?:(?<nullable>nullable)' +
      '|(?<op><=|<|>|>=|==)\\s*(?<opnum>-?\\d+(?:\\.\\d+)?)' +
      '|(?<ltype>len)\\s*(?<lop><=|<|>|>=|==)\\s*(?<lopnum>\\d+))';
const rPartAttr = rPartAttrGroup.replace(/\(\?<[^>]+>/g, '(?:');
const rPartAttrs = `(?:${rPartAttr}(?:\\s*,\\s*${rPartAttr})*)`;
const rAttrGroup = new RegExp(rPartAttrGroup);
const rFindAttrs = new RegExp(`${rPartAttr}(?:\\s*,\\s*|\\s*$)`, 'g');
const rLineCont = /\\\s*$/;
const rComment = /^\s*(?:#-.*|#(?<doc>.*))?$/;
const rGroup = /^group(?:\s+"(?<group>.+?)")?\s*$/;
const rAction = new RegExp(`^action\\s+(?<id>${rPartId})`);
const rPartBaseIds = `(?:\\s*\\(\\s*(?<baseIds>${rPartId}(?:\\s*,\\s*${rPartId})*)\\s*\\)\\s*)`;
const rBaseIdsSplit = /\s*,\s*/;
const rDefinition = new RegExp(`^(?<type>struct|union|enum)\\s+(?<id>${rPartId})${rPartBaseIds}?\\s*$`);
const rSection = new RegExp(`^\\s+(?<type>path|query|input|output|errors)${rPartBaseIds}?\\s*$`);
const rSectionPlain = /^\s+(?<type>urls)\s*$/;
const rPartTypedef =
      `(?<type>${rPartId})` +
      `(?:\\s*\\(\\s*(?<attrs>${rPartAttrs})\\s*\\))?` +
      '(?:' +
      `(?:\\s*\\[\\s*(?<array>${rPartAttrs}?)\\s*\\])?` +
      '|' +
      '(?:' +
      `\\s*:\\s*(?<dictValueType>${rPartId})` +
      `(?:\\s*\\(\\s*(?<dictValueAttrs>${rPartAttrs})\\s*\\))?` +
      ')?' +
      `(?:\\s*\\{\\s*(?<dict>${rPartAttrs}?)\\s*\\})?` +
      ')' +
      `\\s+(?<id>${rPartId})`;
const rTypedef = new RegExp(`^typedef\\s+${rPartTypedef}\\s*$`);
const rMember = new RegExp(`^\\s+(?<optional>optional\\s+)?${rPartTypedef}\\s*$`);
const rValue = new RegExp(`^\\s+(?<id>${rPartId})\\s*$`);
const rValueQuoted = /^\s+"(?<id>.*?)"\s*$/;
const rURL = /^\s+(?<method>[A-Za-z]+|\*)(?:\s+(?<path>\/\S*))?/;
const rLineSplit = /\r?\n/;


/**
 * Parse Schema Markdown from a string or an iterator of strings
 *
 * @param {string|string[]} text - The Schema Markdown text
 * @param {Object} [options = {}] - The options object
 * @param {Object} [options.types={}] - The [type model]{@link https://craigahobbs.github.io/schema-markdown-doc/doc/#var.vName='Types'}
 * @param {string} [options.filename=''] - The name of file being parsed (for error messages)
 * @param {boolean} [options.validate=true] - If true, validate after parsing
 * @returns {Object} The [type model]{@link https://craigahobbs.github.io/schema-markdown-doc/doc/#var.vName='Types'}
 * @throws [SchemaMarkdownParserError]{@link module:lib/parser.SchemaMarkdownParserError}
 */
export function parseSchemaMarkdown(text, {types = {}, filename = '', validate = true} = {}) {
    // Current parser state
    const errorMap = {};
    const filepos = {};
    let action = null;
    let urls = null;
    let userType = null;
    let doc = [];
    let docGroup = null;
    let linenum = 0;

    // Helper function to add an error message
    const addError = (msg, errorFilename, errorLinenum) => {
        const errorMsg = `${errorFilename}:${errorLinenum}: error: ${msg}`;
        errorMap[errorMsg] = [errorFilename, errorLinenum, errorMsg];
    };

    // Helper function to get documentation strings
    const getDoc = () => {
        let result = null;
        if (doc.length) {
            result = doc;
            doc = [];
        }
        return result;
    };

    // Line-split all script text
    const lines = [];
    if (typeof text === 'string') {
        lines.push(...text.split(rLineSplit));
    } else {
        for (const textPart of text) {
            lines.push(...textPart.split(rLineSplit));
        }
    }
    lines.push('');

    // Process each line
    const lineContinuation = [];
    for (const linePart of lines) {
        linenum += 1;

        // Line continuation?
        const linePartNoContinuation = linePart.replace(rLineCont, '');
        if (lineContinuation.length || linePartNoContinuation !== linePart) {
            lineContinuation.push(linePartNoContinuation);
        }
        if (linePartNoContinuation !== linePart) {
            continue;
        }
        let line;
        if (lineContinuation.length) {
            line = lineContinuation.join('');
            lineContinuation.length = 0;
        } else {
            line = linePart;
        }

        // Match syntax
        let matchName = 'comment';
        let match = line.match(rComment);
        if (match === null) {
            matchName = 'group';
            match = line.match(rGroup);
        }
        if (match === null) {
            matchName = 'action';
            match = line.match(rAction);
        }
        if (match === null) {
            matchName = 'definition';
            match = line.match(rDefinition);
        }
        if (match === null && action !== null) {
            matchName = 'section';
            match = line.match(rSection);
        }
        if (match === null && action !== null) {
            matchName = 'sectionPlain';
            match = line.match(rSectionPlain);
        }
        if (match === null && userType !== null && 'enum' in userType) {
            matchName = 'value';
            match = line.match(rValue);
            if (match === null) {
                match = line.match(rValueQuoted);
            }
        }
        if (match === null && userType !== null && 'struct' in userType) {
            matchName = 'member';
            match = line.match(rMember);
        }
        if (match === null && urls !== null) {
            matchName = 'urls';
            match = line.match(rURL);
        }
        if (match === null) {
            matchName = 'typedef';
            match = line.match(rTypedef);
        }
        if (match === null) {
            matchName = null;
        }

        // Comment?
        if (matchName === 'comment') {
            const docString = match.groups.doc;
            if (typeof docString !== 'undefined') {
                doc.push(!docString.startsWith(' ') ? docString : docString.slice(1));
            }

        // Documentation group?
        } else if (matchName === 'group') {
            docGroup = match.groups.group;
            if (typeof docGroup !== 'undefined') {
                docGroup = docGroup.trim();
            } else {
                docGroup = null;
            }

        // Action?
        } else if (matchName === 'action') {
            const actionId = match.groups.id;

            // Action already defined?
            if (actionId in types) {
                addError(`Redefinition of action '${actionId}'`, filename, linenum);
            }

            // Clear parser state
            urls = null;
            userType = null;
            const actionDoc = getDoc();

            // Create the new action
            action = {'name': actionId};
            types[actionId] = {'action': action};
            if (actionDoc !== null) {
                action.doc = actionDoc;
            }
            if (docGroup !== null) {
                action.docGroup = docGroup;
            }

        // Definition?
        } else if (matchName === 'definition') {
            const definitionString = match.groups.type;
            const definitionId = match.groups.id;
            const definitionBaseIds = match.groups.baseIds;

            // Type already defined?
            if (builtinTypes.has(definitionId) || definitionId in builtinDeprecated || definitionId in types) {
                addError(`Redefinition of type '${definitionId}'`, filename, linenum);
            }

            // Clear parser state
            action = null;
            urls = null;
            const definitionDoc = getDoc();

            // Struct definition
            if (definitionString === 'struct' || definitionString === 'union') {
                // Create the new struct type
                const struct = {'name': definitionId};
                userType = {'struct': struct};
                types[definitionId] = userType;
                if (definitionDoc !== null) {
                    struct.doc = definitionDoc;
                }
                if (docGroup !== null) {
                    struct.docGroup = docGroup;
                }
                if (definitionString === 'union') {
                    struct.union = true;
                }
                if (typeof definitionBaseIds !== 'undefined') {
                    struct.bases = definitionBaseIds.split(rBaseIdsSplit);
                }

            // Enum definition
            } else {
                // definitionString == 'enum':
                // Create the new enum type
                const enum_ = {'name': definitionId};
                userType = {'enum': enum_};
                types[definitionId] = userType;
                if (definitionDoc !== null) {
                    enum_.doc = definitionDoc;
                }
                if (docGroup !== null) {
                    enum_.docGroup = docGroup;
                }
                if (typeof definitionBaseIds !== 'undefined') {
                    enum_.bases = definitionBaseIds.split(rBaseIdsSplit);
                }
            }

            // Record the definition's line number
            filepos[definitionId] = linenum;

        // Action section?
        } else if (matchName === 'section') {
            const sectionString = match.groups.type;
            const sectionBaseIds = match.groups.baseIds;

            // Action section redefinition?
            if (sectionString in action) {
                addError(`Redefinition of action ${sectionString}`, filename, linenum);
            }

            // Clear parser state
            urls = null;

            // Set the action section type
            const sectionTypeName = `${action.name}_${sectionString}`;
            action[sectionString] = sectionTypeName;
            if (sectionString === 'errors') {
                const enum_ = {'name': sectionTypeName};
                userType = {'enum': enum_};
                types[sectionTypeName] = userType;
                if (typeof sectionBaseIds !== 'undefined') {
                    enum_.bases = sectionBaseIds.split(rBaseIdsSplit);
                }
            } else {
                const struct = {'name': sectionTypeName};
                userType = {'struct': struct};
                types[sectionTypeName] = userType;
                if (typeof sectionBaseIds !== 'undefined') {
                    struct.bases = sectionBaseIds.split(rBaseIdsSplit);
                }
            }

            // Record the definition's line number
            filepos[sectionTypeName] = linenum;

        // Plain action section?
        } else if (matchName === 'sectionPlain') {
            const sectionString = match.groups.type;

            // Action section redefinition?
            if (sectionString in action) {
                addError(`Redefinition of action ${sectionString}`, filename, linenum);
            }

            // Clear parser state
            userType = null;

            // Update the parser state
            urls = [];

        // Enum value?
        } else if (matchName === 'value') {
            const valueString = match.groups.id;

            // Add the enum value
            const enum_ = userType.enum;
            if (!('values' in enum_)) {
                enum_.values = [];
            }
            const enumValue = {'name': valueString};
            enum_.values.push(enumValue);
            const enumValueDoc = getDoc();
            if (enumValueDoc !== null) {
                enumValue.doc = enumValueDoc;
            }

            // Record the definition's line number
            filepos[`${enum_.name}.${valueString}`] = linenum;

        // Struct member?
        } else if (matchName === 'member') {
            const optional = typeof match.groups.optional !== 'undefined';
            const memberName = match.groups.id;

            // Add the member
            const {struct} = userType;
            if (!('members' in struct)) {
                struct.members = [];
            }
            const [memberType, memberAttr] = parseTypedef(match);
            const memberDoc = getDoc();
            const member = {
                'name': memberName,
                'type': memberType
            };
            struct.members.push(member);
            if (memberAttr !== null) {
                member.attr = memberAttr;
            }
            if (memberDoc !== null) {
                member.doc = memberDoc;
            }
            if (optional) {
                member.optional = true;
            }

            // Record the definition's line number
            filepos[`${struct.name}.${memberName}`] = linenum;

        // URL?
        } else if (matchName === 'urls') {
            const {method, path} = match.groups;

            // Create the action URL object
            const actionUrl = {};
            if (method !== '*') {
                actionUrl.method = method;
            }
            if (typeof path !== 'undefined') {
                actionUrl.path = path;
            }

            // Duplicate URL?
            if (urls.some((url) => url.method === actionUrl.method && url.path === actionUrl.path)) {
                const paddedPath = 'path' in actionUrl ? ` ${actionUrl.path}` : '';
                addError(`Duplicate URL: ${method}${paddedPath}`, filename, linenum);
            }

            // Add the URL
            if (!('urls' in action)) {
                action.urls = urls;
            }
            urls.push(actionUrl);

        // Typedef?
        } else if (matchName === 'typedef') {
            const definitionId = match.groups.id;

            // Type already defined?
            if (builtinTypes.has(definitionId) || definitionId in builtinDeprecated || definitionId in types) {
                addError(`Redefinition of type '${definitionId}'`, filename, linenum);
            }

            // Clear parser state
            action = null;
            urls = null;
            userType = null;
            const typedefDoc = getDoc();

            // Create the typedef
            const [typedefType, typedefAttr] = parseTypedef(match);
            const typedef = {
                'name': definitionId,
                'type': typedefType
            };
            types[definitionId] = {'typedef': typedef};
            if (typedefAttr !== null) {
                typedef.attr = typedefAttr;
            }
            if (typedefDoc !== null) {
                typedef.doc = typedefDoc;
            }
            if (docGroup !== null) {
                typedef.docGroup = docGroup;
            }

            // Record the definition's line number
            filepos[definitionId] = linenum;

        // Unrecognized line syntax
        } else {
            addError('Syntax error', filename, linenum);
        }
    }

    // Validate the type model, if requested
    if (validate) {
        for (const [typeName, memberName, errorMsg] of validateTypeModelErrors(types)) {
            let errorFilename = filename;
            let errorLinenum = null;
            if (memberName !== null) {
                errorLinenum = filepos[`${typeName}.${memberName}`] ?? null;
            }
            if (errorLinenum === null) {
                errorLinenum = filepos[typeName] ?? null;
            }
            if (errorLinenum === null) {
                errorFilename = '';
                errorLinenum = 1;
            }
            addError(errorMsg, errorFilename, errorLinenum);
        }
    }

    // Raise a parser exception if there are any errors
    const errors = Array.from(Object.values(errorMap)).sort(compareTuple).map(([,, msg]) => msg);
    if (errors.length) {
        throw new SchemaMarkdownParserError(errors);
    }

    return types;
}


function compareTuple(v1, v2) {
    return v1.reduce((tot, val, idx) => (tot !== 0 ? tot : (val < v2[idx] ? -1 : (val > v2[idx] ? 1 : 0))), 0);
}


// Helper function to parse a typedef - returns a type-model and attributes-model tuple
function parseTypedef(matchTypedef) {
    const arrayAttrsString = matchTypedef.groups.array;
    const dictAttrsString = matchTypedef.groups.dict;

    // Array type?
    if (typeof arrayAttrsString !== 'undefined') {
        const valueTypeName = matchTypedef.groups.type;
        const valueAttr = parseAttr(matchTypedef.groups.attrs);
        const arrayType = {'type': createType(valueTypeName)};
        if (valueAttr !== null) {
            arrayType.attr = valueAttr;
        }
        return [{'array': arrayType}, parseAttr(arrayAttrsString)];
    }

    // Dictionary type?
    if (typeof dictAttrsString !== 'undefined') {
        let dictType;
        let valueTypeName = matchTypedef.groups.dictValueType;
        if (typeof valueTypeName !== 'undefined') {
            const valueAttr = parseAttr(matchTypedef.groups.dictValueAttrs);
            const keyTypeName = matchTypedef.groups.type;
            const keyAttr = parseAttr(matchTypedef.groups.attrs);
            dictType = {
                'type': createType(valueTypeName),
                'keyType': createType(keyTypeName)
            };
            if (valueAttr !== null) {
                dictType.attr = valueAttr;
            }
            if (keyAttr !== null) {
                dictType.keyAttr = keyAttr;
            }
        } else {
            valueTypeName = matchTypedef.groups.type;
            const valueAttr = parseAttr(matchTypedef.groups.attrs);
            dictType = {'type': createType(valueTypeName)};
            if (valueAttr !== null) {
                dictType.attr = valueAttr;
            }
        }
        return [{'dict': dictType}, parseAttr(dictAttrsString)];
    }

    // Non-container type...
    const memberTypeName = matchTypedef.groups.type;
    return [createType(memberTypeName), parseAttr(matchTypedef.groups.attrs)];
}


// Helper function to create a type model
function createType(typeName) {
    if (typeName in builtinDeprecated) {
        return {'builtin': builtinDeprecated[typeName]};
    }
    if (builtinTypes.has(typeName)) {
        return {'builtin': typeName};
    }
    return {'user': typeName};
}


// Helper function to parse an attributes string - returns an attributes model
function parseAttr(attrsString) {
    let attrs = null;
    if (typeof attrsString !== 'undefined') {
        for (const [attrString] of attrsString.matchAll(rFindAttrs)) {
            if (attrs === null) {
                attrs = {};
            }
            const matchAttr = attrString.match(rAttrGroup);
            const attrOp = matchAttr.groups.op;
            const attrLengthOp = matchAttr.groups.lop;

            if (typeof matchAttr.groups.nullable !== 'undefined') {
                attrs.nullable = true;
            } else if (typeof attrOp !== 'undefined') {
                const attrValue = parseFloat(matchAttr.groups.opnum);
                if (attrOp === '<') {
                    attrs.lt = attrValue;
                } else if (attrOp === '<=') {
                    attrs.lte = attrValue;
                } else if (attrOp === '>') {
                    attrs.gt = attrValue;
                } else if (attrOp === '>=') {
                    attrs.gte = attrValue;
                } else {
                    attrs.eq = attrValue;
                }
            } else {
                // typeof attrLengthOp !== 'undefined'
                const attrValue = parseInt(matchAttr.groups.lopnum, 10);
                if (attrLengthOp === '<') {
                    attrs.lenLT = attrValue;
                } else if (attrLengthOp === '<=') {
                    attrs.lenLTE = attrValue;
                } else if (attrLengthOp === '>') {
                    attrs.lenGT = attrValue;
                } else if (attrLengthOp === '>=') {
                    attrs.lenGTE = attrValue;
                } else {
                    attrs.lenEq = attrValue;
                }
            }
        }
    }
    return attrs;
}


/**
 * Schema Markdown parser error
 *
 * @extends {Error}
 * @property {string[]} errors - The list of error strings
 */
export class SchemaMarkdownParserError extends Error {
    /**
     * Create a Schema Markdown parser error instance
     *
     * @param {string[]} errors - The list of error strings
     */
    constructor(errors) {
        super(errors.join('\n'));
        this.name = this.constructor.name;
        this.errors = errors;
    }
}