const path = require('path'); const { PassThrough, Transform } = require('stream'); const { BuildType } = require('../utils'); const { lintTransformedFile } = require('./utils'); const hasKey = (obj, key) => Reflect.hasOwnProperty.call(obj, key); module.exports = { createRemoveFencedCodeTransform, removeFencedCode, }; class RemoveFencedCodeTransform extends Transform { /** * A transform stream that calls {@link removeFencedCode} on the complete * string contents of the file read by Browserify. * * Optionally lints the file if it was modified. * * @param {string} filePath - The path to the file being transformed. * @param {string} buildType - The type of the current build process. * @param {boolean} shouldLintTransformedFiles - Whether the file should be * linted if modified by the transform. */ constructor(filePath, buildType, shouldLintTransformedFiles) { super(); this.filePath = filePath; this.buildType = buildType; this.shouldLintTransformedFiles = shouldLintTransformedFiles; this._fileBuffers = []; } // This function is called whenever data is written to the stream. // It concatenates all buffers for the current file into a single buffer. _transform(buffer, _encoding, next) { this._fileBuffers.push(buffer); next(); } // "flush" is called when all data has been written to the // stream, immediately before the "end" event is emitted. // It applies the transform to the concatenated file contents. _flush(end) { let fileContent, didModify; try { [fileContent, didModify] = removeFencedCode( this.filePath, this.buildType, Buffer.concat(this._fileBuffers).toString('utf8'), ); } catch (error) { return end(error); } const pushAndEnd = () => { this.push(fileContent); end(); }; if (this.shouldLintTransformedFiles && didModify) { return lintTransformedFile(fileContent, this.filePath) .then(pushAndEnd) .catch((error) => end(error)); } return pushAndEnd(); } } /** * A factory for a Browserify transform that removes fenced code from all * JavaScript source files. The transform is applied to files with the following * extensions: * - `.js` * - `.cjs` * - `.mjs` * * For details on how the transform mutates source files, see * {@link removeFencedCode} and the documentation. * * If specified (and by default), the transform will call ESLint on the text * contents of any file that it modifies. The transform will error if such a * file is ignored by ESLint, since linting is our first line of defense against * making un-syntactic modifications to files using code fences. * * @param {string} buildType - The type of the current build. * @param {boolean} shouldLintTransformedFiles - Whether to lint transformed files. * @returns {(filePath: string) => Transform} The transform function. */ function createRemoveFencedCodeTransform( buildType, shouldLintTransformedFiles = true, ) { if (!hasKey(BuildType, buildType)) { throw new Error( `Code fencing transform received unrecognized build type "${buildType}".`, ); } // Browserify transforms are functions that receive a file name and return a // duplex stream. The stream receives the file contents piecemeal in the form // of Buffers. // To apply our code fencing transform, we concatenate all buffers and convert // them to a single string, then apply the actual transform function on that // string. /** * Returns a transform stream that removes fenced code from JavaScript files. For non-JavaScript * files, a pass-through stream is returned. * * @param filePath - The file path to transform. * @returns {Transform} The transform stream. */ return function removeFencedCodeTransform(filePath) { if (!['.js', '.cjs', '.mjs'].includes(path.extname(filePath))) { return new PassThrough(); } return new RemoveFencedCodeTransform( filePath, buildType, shouldLintTransformedFiles, ); }; } const DirectiveTerminuses = { BEGIN: 'BEGIN', END: 'END', }; const DirectiveCommands = { ONLY_INCLUDE_IN: 'ONLY_INCLUDE_IN', }; const CommandValidators = { [DirectiveCommands.ONLY_INCLUDE_IN]: (params, filePath) => { if (!params || params.length === 0) { throw new Error( getInvalidParamsMessage( filePath, DirectiveCommands.ONLY_INCLUDE_IN, `No params specified.`, ), ); } params.forEach((param) => { if (!hasKey(BuildType, param)) { throw new Error( getInvalidParamsMessage( filePath, DirectiveCommands.ONLY_INCLUDE_IN, `"${param}" is not a valid build type.`, ), ); } }); }, }; // Matches lines starting with "///:", and any preceding whitespace, except // newlines. We except newlines to avoid eating blank lines preceding a fenced // line. // Double-negative RegEx credit: https://stackoverflow.com/a/3469155 const linesWithFenceRegex = /^[^\S\r\n]*\/\/\/:.*$/gmu; // Matches the first "///:" in a string, and any preceding whitespace const fenceSentinelRegex = /^\s*\/\/\/:/u; // Breaks a fence directive into its constituent components // At this stage of parsing, we are looking for one of: // - TERMINUS:COMMAND(PARAMS) // - TERMINUS:COMMAND const directiveParsingRegex = /^([A-Z]+):([A-Z_]+)(?:\(((?:\w+,)*\w+)\))?$/u; /** * Removes fenced code from the given JavaScript source string. "Fenced code" * includes the entire fence lines, including their trailing newlines, and the * lines that they surround. * * A valid fence consists of two well-formed fence lines, separated by one or * more lines that should be excluded. The first line must contain a `BEGIN` * directive, and the second most contain an `END` directive. Both directives * must specify the same command. * * Here's an example of a valid fence: * * ```javascript * ///: BEGIN:ONLY_INCLUDE_IN(flask) * console.log('I am Flask.'); * ///: END:ONLY_INCLUDE_IN * ``` * * For details, please see the documentation. * * @param {string} filePath - The path to the file being transformed. * @param {string} typeOfCurrentBuild - The type of the current build. * @param {string} fileContent - The contents of the file being transformed. * @returns {[string, modified]} A tuple of the post-transform file contents and * a boolean indicating whether they were modified. */ function removeFencedCode(filePath, typeOfCurrentBuild, fileContent) { const matchedLines = [...fileContent.matchAll(linesWithFenceRegex)]; // If we didn't match any lines, return the unmodified file contents. if (matchedLines.length === 0) { return [fileContent, false]; } // Parse fence lines const parsedDirectives = matchedLines.map((matchArray) => { const line = matchArray[0]; /* istanbul ignore next: should be impossible */ if (!fenceSentinelRegex.test(line)) { throw new Error( getInvalidFenceLineMessage( filePath, line, `Fence sentinel may only appear at the start of a line, optionally preceded by whitespace.`, ), ); } // Store the start and end indices of each line // Increment the end index by 1 to including the trailing newline when // performing string operations. const indices = [matchArray.index, matchArray.index + line.length + 1]; const lineWithoutSentinel = line.replace(fenceSentinelRegex, ''); if (!/^ \w\w+/u.test(lineWithoutSentinel)) { throw new Error( getInvalidFenceLineMessage( filePath, line, `Fence sentinel must be followed by a single space and an alphabetical string of two or more characters.`, ), ); } const directiveMatches = lineWithoutSentinel .trim() .match(directiveParsingRegex); if (!directiveMatches) { throw new Error( getInvalidFenceLineMessage( filePath, line, `Failed to parse fence directive.`, ), ); } // The first element of a RegEx match array is the input const [, terminus, command, parameters] = directiveMatches; if (!hasKey(DirectiveTerminuses, terminus)) { throw new Error( getInvalidFenceLineMessage( filePath, line, `Line contains invalid directive terminus "${terminus}".`, ), ); } if (!hasKey(DirectiveCommands, command)) { throw new Error( getInvalidFenceLineMessage( filePath, line, `Line contains invalid directive command "${command}".`, ), ); } const parsed = { line, indices, terminus, command, }; if (parameters !== undefined) { parsed.parameters = parameters.split(','); } return parsed; }); if (parsedDirectives.length % 2 !== 0) { throw new Error( getInvalidFenceStructureMessage( filePath, `A valid fence consists of two fence lines, but the file contains an uneven number, "${parsedDirectives.length}", of fence lines.`, ), ); } // The below for-loop iterates over the parsed fence directives and performs // the following work: // - Ensures that the array of parsed directives consists of valid directive // pairs, as specified in the documentation. // - For each directive pair, determines whether their fenced lines should be // removed for the current build, and if so, stores the indices we will use // to splice the file content string. const splicingIndices = []; let shouldSplice = false; let currentCommand; for (let i = 0; i < parsedDirectives.length; i++) { const { line, indices, terminus, command, parameters } = parsedDirectives[ i ]; if (i % 2 === 0) { if (terminus !== DirectiveTerminuses.BEGIN) { throw new Error( getInvalidFencePairMessage( filePath, line, `The first directive of a pair must be a "BEGIN" directive.`, ), ); } currentCommand = command; // Throws an error if the command parameters are invalid CommandValidators[command](parameters, filePath); if (parameters.includes(typeOfCurrentBuild)) { shouldSplice = false; } else { shouldSplice = true; // Add start index of BEGIN directive line to splicing indices splicingIndices.push(indices[0]); } } else { if (terminus !== DirectiveTerminuses.END) { throw new Error( getInvalidFencePairMessage( filePath, line, `The second directive of a pair must be an "END" directive.`, ), ); } /* istanbul ignore next: impossible until there's more than one command */ if (command !== currentCommand) { throw new Error( getInvalidFencePairMessage( filePath, line, `Expected "END" directive to have command "${currentCommand}" but found "${command}".`, ), ); } // Forbid empty fences const { line: previousLine, indices: previousIndices } = parsedDirectives[ i - 1 ]; if (fileContent.substring(previousIndices[1], indices[0]).trim() === '') { throw new Error( `Empty fence found in file "${filePath}":\n${previousLine}\n${line}\n`, ); } if (shouldSplice) { // Add end index of END directive line to splicing indices splicingIndices.push(indices[1]); } } } // This indicates that the present build type should include all fenced code, // and so we just returned the unmodified file contents. if (splicingIndices.length === 0) { return [fileContent, false]; } /* istanbul ignore next: should be impossible */ if (splicingIndices.length % 2 !== 0) { throw new Error( `Internal error while transforming file "${filePath}":\nCollected an uneven number of splicing indices: "${splicingIndices.length}"`, ); } return [multiSplice(fileContent, splicingIndices), true]; } /** * Returns a copy of the given string, without the character ranges specified * by the splicing indices array. * * The splicing indices must be a non-empty, even-length array of non-negative * integers, specifying the character ranges to remove from the given string, as * follows: * * `[ start, end, start, end, start, end, ... ]` * * @param {string} toSplice - The string to splice. * @param {number[]} splicingIndices - Indices to splice at. * @returns {string} The spliced string. */ function multiSplice(toSplice, splicingIndices) { const retainedSubstrings = []; // Get the first part to be included // The substring() call returns an empty string if splicingIndices[0] is 0, // which is exactly what we want in that case. retainedSubstrings.push(toSplice.substring(0, splicingIndices[0])); // This loop gets us all parts of the string that should be retained, except // the first and the last. // It iterates over all "end" indices of the array except the last one, and // pushes the substring between each "end" index and the next "begin" index // to the array of retained substrings. if (splicingIndices.length > 2) { // Note the boundary index of "splicingIndices.length - 1". This loop must // not iterate over the last element of the array. for (let i = 1; i < splicingIndices.length - 1; i += 2) { retainedSubstrings.push( toSplice.substring(splicingIndices[i], splicingIndices[i + 1]), ); } } // Get the last part to be included retainedSubstrings.push( toSplice.substring(splicingIndices[splicingIndices.length - 1]), ); return retainedSubstrings.join(''); } /** * @param {string} filePath - The path to the file that caused the error. * @param {string} line - The contents of the line with the error. * @param {string} details - An explanation of the error. * @returns The error message. */ function getInvalidFenceLineMessage(filePath, line, details) { return `Invalid fence line in file "${filePath}": "${line}":\n${details}`; } /** * @param {string} filePath - The path to the file that caused the error. * @param {string} details - An explanation of the error. * @returns The error message. */ function getInvalidFenceStructureMessage(filePath, details) { return `Invalid fence structure in file "${filePath}":\n${details}`; } /** * @param {string} filePath - The path to the file that caused the error. * @param {string} line - The contents of the line with the error. * @param {string} details - An explanation of the error. * @returns The error message. */ function getInvalidFencePairMessage(filePath, line, details) { return `Invalid fence pair in file "${filePath}" due to line "${line}":\n${details}`; } /** * @param {string} filePath - The path to the file that caused the error. * @param {string} command - The command of the directive with the invalid * parameters. * @param {string} details - An explanation of the error. * @returns The error message. */ function getInvalidParamsMessage(filePath, command, details) { return `Invalid code fence parameters in file "${filePath}" for command "${command}":\n${details}`; }