Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 62 additions & 13 deletions src/core/tokenize/match.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { Token } from '../classes/token.js';
import singleton from '../prism.js';
import { tokenize } from './tokenize.js';
import { resolve } from './util.js';
import { resolve, tokenizeByNamedGroups } from './util.js';

/**
* @this {Prism}
Expand All @@ -21,7 +21,12 @@ export function _matchGrammar (text, tokenList, grammar, startNode, startPos, re

for (const token in grammar) {
const tokenValue = grammar[token];
if (!grammar.hasOwnProperty(token) || token.startsWith('$') || !tokenValue) {
if (
!grammar.hasOwnProperty(token) ||
token.startsWith('$') ||
!tokenValue ||
typeof tokenValue === 'function' // functional tokens ($inside for now) are handled on L170, and we should ignore them in all other cases
) {
continue;
}

Expand All @@ -36,9 +41,20 @@ export function _matchGrammar (text, tokenList, grammar, startNode, startPos, re
let { pattern, lookbehind = false, greedy = false, alias, inside } = patternObj;
const insideGrammar = resolve.call(prism, inside);

let flagsToAdd = '';

if (greedy && !pattern.global) {
// Without the global flag, lastIndex won't work
patternObj.pattern = pattern = RegExp(pattern.source, pattern.flags + 'g');
flagsToAdd += 'g';
}

if (pattern.source?.includes('(?<') && pattern.hasIndices === false) {
// Has named groups, we need to be able to capture their indices
flagsToAdd += 'd';
}

if (flagsToAdd) {
patternObj.pattern = pattern = RegExp(pattern.source, pattern.flags + flagsToAdd);
}

for (
Expand All @@ -63,7 +79,8 @@ export function _matchGrammar (text, tokenList, grammar, startNode, startPos, re
}

let removeCount = 1; // this is the to parameter of removeBetween
let match;
/** @type {RegExpExecArray | null} */
let match = null;

if (greedy) {
match = matchPattern(pattern, pos, text, lookbehind);
Expand Down Expand Up @@ -117,6 +134,10 @@ export function _matchGrammar (text, tokenList, grammar, startNode, startPos, re

const from = match.index;
const matchStr = match[0];

/** @type {TokenStream | string} */
let content = matchStr;

const before = str.slice(0, from);
const after = str.slice(from + matchStr.length);

Expand All @@ -134,14 +155,42 @@ export function _matchGrammar (text, tokenList, grammar, startNode, startPos, re

tokenList.removeRange(removeFrom, removeCount);

const wrapped = new Token(
token,
insideGrammar
? tokenize.call(prism, matchStr, /** @type {Grammar} */ (insideGrammar))
: matchStr,
alias,
matchStr
);
const byGroups = match.groups ? tokenizeByNamedGroups(match) : null;
if (byGroups && byGroups.length > 1) {
content = byGroups
.map(arg => {
let content = typeof arg === 'string' ? arg : arg.content;
const type = typeof arg === 'string' ? undefined : arg.type;

if (insideGrammar) {
let localInsideGrammar = type ? insideGrammar[type] : insideGrammar;

if (typeof localInsideGrammar === 'function') {
// Late resolving
localInsideGrammar = resolve.call(
prism,
localInsideGrammar(match.groups)
);
}

if (localInsideGrammar) {
// @ts-ignore
content = tokenize.call(prism, content, localInsideGrammar);
}
}

return typeof arg === 'object' && arg.type
? new Token(arg.type, content)
: content;
})
.flat(); // Flatten tokens like ['foo']
}
else if (insideGrammar) {
// @ts-ignore
content = tokenize.call(prism, content, insideGrammar);
}

const wrapped = new Token(token, content, alias, matchStr);
currentNode = tokenList.addAfter(removeFrom, wrapped);

if (after) {
Expand Down Expand Up @@ -216,7 +265,7 @@ function toGrammarToken (pattern) {

/**
* @import { Prism } from '../prism.js';
* @import { Grammar, GrammarToken, GrammarTokens, RegExpLike } from '../../types.d.ts';
* @import { Grammar, GrammarToken, GrammarTokens, TokenStream, RegExpLike } from '../../types.d.ts';
*/

/**
Expand Down
46 changes: 44 additions & 2 deletions src/core/tokenize/util.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import { camelToKebabCase } from '../../shared/util.js';
import singleton from '../prism.js';

/**
* @this {Prism}
* @param {Grammar | string | null | undefined} reference
* @returns {Grammar | undefined}
* @param {Grammar | string | Function | null | undefined} reference
* @returns {Grammar | Function | undefined}
*/
export function resolve (reference) {
const prism = this ?? singleton;
Expand All @@ -13,6 +14,11 @@ export function resolve (reference) {
ret = prism.languageRegistry.getLanguage(ret)?.resolvedGrammar;
}

if (typeof ret === 'function' && ret.length === 0) {
// Function with no arguments, resolve eagerly
ret = ret.call(prism);
}

if (typeof ret === 'object' && ret.$rest) {
const restGrammar = resolve.call(prism, ret.$rest) ?? {};
if (typeof restGrammar === 'object') {
Expand All @@ -25,6 +31,42 @@ export function resolve (reference) {
return /** @type {Grammar | undefined} */ (ret);
}

/**
*
* @param {RegExpExecArray} match
* @returns {({type: string, content: string} | string)[]}
*/
export function tokenizeByNamedGroups (match) {
const str = match[0];
const result = [];
let i = 0;

const entries = Object.entries(match.indices?.groups || {})
.map(([type, [start, end]]) => ({
type,
start: start - match.index,
end: end - match.index,
}))
.sort((a, b) => a.start - b.start);

for (let { type, start, end } of entries) {
if (start > i) {
result.push(str.slice(i, start));
}

const content = str.slice(start, end);
type = camelToKebabCase(type);
result.push({ type, content });
i = end;
}

if (i < str.length) {
result.push(str.slice(i));
}

return result;
}

/**
* @import { Prism } from '../prism.js';
* @import { Grammar, LanguageRegistry } from '../../types.d.ts';
Expand Down
79 changes: 15 additions & 64 deletions src/languages/markdown.js
Original file line number Diff line number Diff line change
Expand Up @@ -99,73 +99,24 @@ export default {
// ```optional language
// code block
// ```
pattern: /^```[\s\S]*?^```$/m,
greedy: true,
inside: /** @type {Grammar} */ ({
'code-block': {
pattern: /^(```.*(?:\n|\r\n?))[\s\S]+?(?=(?:\n|\r\n?)^```$)/m,
lookbehind: true,
},
'code-language': {
pattern: /^(```).+/,
lookbehind: true,
},
'punctuation': /```/,
/** @type {Grammar['$tokenize']} */
$tokenize (code, grammar, Prism) {
const tokens = Prism.tokenize(code, withoutTokenize(grammar));

/*
* Add the correct `language-xxxx` class to this code block. Keep in mind that the `code-language` token
* is optional. But the grammar is defined so that there is only one case we have to handle:
*
* token.content = [
* <span class="punctuation">```</span>,
* <span class="code-language">xxxx</span>,
* '\n', // exactly one new lines (\r or \n or \r\n)
* <span class="code-block">...</span>,
* '\n', // exactly one new lines again
* <span class="punctuation">```</span>
* ];
*/

const codeLang = tokens[1];
const codeBlock = tokens[3];

if (
typeof codeLang === 'object' &&
typeof codeBlock === 'object' &&
codeLang.type === 'code-language' &&
codeBlock.type === 'code-block'
) {
// this might be a language that Prism does not support

// do some replacements to support C++, C#, and F#
const lang = getTextContent(codeLang.content)
.replace(/\b#/g, 'sharp')
.replace(/\b\+\+/g, 'pp');
// only use the first word
const langName = /[a-z][\w-]*/i.exec(lang)?.[0].toLowerCase();
if (langName) {
codeBlock.addAlias('language-' + langName);

const grammar =
Prism.languageRegistry.getLanguage(lang)?.resolvedGrammar;
if (grammar) {
codeBlock.content = Prism.tokenize(
getTextContent(codeBlock),
grammar
);
}
else {
codeBlock.addAlias('needs-highlighting');
}
pattern:
/^```\s*(?<codeLanguage>\{[^{}]*\}|[a-z+#-]+)(?:[ \t][^\n\r]*)?(?:\n|\r\n?)(?<codeBlock>[\s\S]*?)(?:\n|\r\n?)```$/im,
inside: {
'code-block': groups => {
let lang = groups.codeLanguage;
// Extract language code from curly braces like {r pressure, echo=FALSE} → r
if (lang.startsWith('{') && lang.endsWith('}')) {
const match = lang.slice(1, -1).match(/^\s*([a-z+#-]+)/i);
if (match) {
lang = match[0];
}
}

return tokens;
// Apply transformations: c++ → cpp, c# → csharp, f# → fsharp, etc.
lang = lang.replace(/\b#/g, 'sharp').replace(/\b\+\+/g, 'pp');
return lang.toLowerCase();
},
}),
'punctuation': /```/,
},
},
],
'title': [
Expand Down
10 changes: 5 additions & 5 deletions src/shared/languages/templating.js
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,9 @@ export function templating (code, hostGrammar, templateGrammar, Prism) {
hostGrammar = resolve.call(Prism, hostGrammar);
templateGrammar = resolve.call(Prism, templateGrammar);

const { hostCode, tokenStack } = buildPlaceholders(code, templateGrammar, Prism);
const { hostCode, tokenStack } = buildPlaceholders(code, /** @type {Grammar | undefined} */ (templateGrammar), Prism);

const tokens = hostGrammar ? Prism.tokenize(hostCode, hostGrammar) : [hostCode];
const tokens = hostGrammar ? Prism.tokenize(hostCode, /** @type {Grammar} */ (hostGrammar)) : [hostCode];
insertIntoHostToken(tokens, tokenStack);
return tokens;
}
Expand All @@ -145,10 +145,10 @@ export function embeddedIn (hostGrammar) {
}

/**
* @import { Prism, Token } from '../../core.js';
* @import { TokenStream, TokenStack, Grammar, LanguageRegistry} from '../../types.d.ts';
* @import { Prism } from '../../core.js';
* @import { TokenStream, TokenStack, Grammar } from '../../types.d.ts';
*/

/**
* @typedef {Grammar | string | undefined | null} GrammarRef
* @typedef {Grammar | Function | string | undefined | null} GrammarRef
*/
10 changes: 10 additions & 0 deletions src/shared/util.js
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,13 @@ export function kebabToCamelCase (kebab) {
const [first, ...others] = kebab.split(/-/);
return first + others.map(capitalize).join('');
}

/**
* Converts the given camel case identifier to a kebab case identifier.
*
* @param {string} str
* @returns
*/
export function camelToKebabCase (str) {
return (str + '').replace(/[A-Z]/g, l => '-' + l.toLowerCase());
}
Loading
Loading