PrismJS · DmitrySharabin · Nov 18, 2025 · Nov 18, 2025 · Nov 19, 2025 · Nov 19, 2025
diff --git a/src/core/tokenize/match.js b/src/core/tokenize/match.js
@@ -1,7 +1,7 @@
 import { Token } from '../classes/token.js';
 import singleton from '../prism.js';
 import { tokenize } from './tokenize.js';
-import { resolve } from './util.js';
+import { resolve, tokenizeByNamedGroups } from './util.js';
 
 /**
  * @this {Prism}
@@ -21,7 +21,12 @@ export function _matchGrammar (text, tokenList, grammar, startNode, startPos, re
 
 	for (const token in grammar) {
 		const tokenValue = grammar[token];
-		if (!grammar.hasOwnProperty(token) || token.startsWith('$') || !tokenValue) {
+		if (
+			!grammar.hasOwnProperty(token) ||
+			token.startsWith('$') ||
+			!tokenValue ||
+			typeof tokenValue === 'function' // functional tokens ($inside for now) are handled on L170, and we should ignore them in all other cases
+		) {
 			continue;
 		}
 
@@ -36,9 +41,20 @@ export function _matchGrammar (text, tokenList, grammar, startNode, startPos, re
 			let { pattern, lookbehind = false, greedy = false, alias, inside } = patternObj;
 			const insideGrammar = resolve.call(prism, inside);
 
+			let flagsToAdd = '';
+
 			if (greedy && !pattern.global) {
 				// Without the global flag, lastIndex won't work
-				patternObj.pattern = pattern = RegExp(pattern.source, pattern.flags + 'g');
+				flagsToAdd += 'g';
+			}
+
+			if (pattern.source?.includes('(?<') && pattern.hasIndices === false) {
+				// Has named groups, we need to be able to capture their indices
+				flagsToAdd += 'd';
+			}
+
+			if (flagsToAdd) {
+				patternObj.pattern = pattern = RegExp(pattern.source, pattern.flags + flagsToAdd);
 			}
 
 			for (
@@ -63,7 +79,8 @@ export function _matchGrammar (text, tokenList, grammar, startNode, startPos, re
 				}
 
 				let removeCount = 1; // this is the to parameter of removeBetween
-				let match;
+				/** @type {RegExpExecArray | null} */
+				let match = null;
 
 				if (greedy) {
 					match = matchPattern(pattern, pos, text, lookbehind);
@@ -117,6 +134,10 @@ export function _matchGrammar (text, tokenList, grammar, startNode, startPos, re
 
 				const from = match.index;
 				const matchStr = match[0];
+
+				/** @type {TokenStream | string} */
+				let content = matchStr;
+
 				const before = str.slice(0, from);
 				const after = str.slice(from + matchStr.length);
 
@@ -134,14 +155,42 @@ export function _matchGrammar (text, tokenList, grammar, startNode, startPos, re
 
 				tokenList.removeRange(removeFrom, removeCount);
 
-				const wrapped = new Token(
-					token,
-					insideGrammar
-						? tokenize.call(prism, matchStr, /** @type {Grammar} */ (insideGrammar))
-						: matchStr,
-					alias,
-					matchStr
-				);
+				const byGroups = match.groups ? tokenizeByNamedGroups(match) : null;
+				if (byGroups && byGroups.length > 1) {
+					content = byGroups
+						.map(arg => {
+							let content = typeof arg === 'string' ? arg : arg.content;
+							const type = typeof arg === 'string' ? undefined : arg.type;
+
+							if (insideGrammar) {
+								let localInsideGrammar = type ? insideGrammar[type] : insideGrammar;
+
+								if (typeof localInsideGrammar === 'function') {
+									// Late resolving
+									localInsideGrammar = resolve.call(
+										prism,
+										localInsideGrammar(match.groups)
+									);
+								}
+
+								if (localInsideGrammar) {
+									// @ts-ignore
+									content = tokenize.call(prism, content, localInsideGrammar);
+								}
+							}
+
+							return typeof arg === 'object' && arg.type
+								? new Token(arg.type, content)
+								: content;
+						})
+						.flat(); // Flatten tokens like ['foo']
+				}
+				else if (insideGrammar) {
+					// @ts-ignore
+					content = tokenize.call(prism, content, insideGrammar);
+				}
+
+				const wrapped = new Token(token, content, alias, matchStr);
 				currentNode = tokenList.addAfter(removeFrom, wrapped);
 
 				if (after) {
@@ -216,7 +265,7 @@ function toGrammarToken (pattern) {
 
 /**
  * @import { Prism } from '../prism.js';
- * @import { Grammar, GrammarToken, GrammarTokens, RegExpLike } from '../../types.d.ts';
+ * @import { Grammar, GrammarToken, GrammarTokens, TokenStream, RegExpLike } from '../../types.d.ts';
  */
 
 /**

diff --git a/src/core/tokenize/util.js b/src/core/tokenize/util.js
@@ -1,9 +1,10 @@
+import { camelToKebabCase } from '../../shared/util.js';
 import singleton from '../prism.js';
 
 /**
  * @this {Prism}
- * @param {Grammar | string | null | undefined} reference
- * @returns {Grammar | undefined}
+ * @param {Grammar | string | Function | null | undefined} reference
+ * @returns {Grammar | Function | undefined}
  */
 export function resolve (reference) {
 	const prism = this ?? singleton;
@@ -13,6 +14,11 @@ export function resolve (reference) {
 		ret = prism.languageRegistry.getLanguage(ret)?.resolvedGrammar;
 	}
 
+	if (typeof ret === 'function' && ret.length === 0) {
+		// Function with no arguments, resolve eagerly
+		ret = ret.call(prism);
+	}
+
 	if (typeof ret === 'object' && ret.$rest) {
 		const restGrammar = resolve.call(prism, ret.$rest) ?? {};
 		if (typeof restGrammar === 'object') {
@@ -25,6 +31,42 @@ export function resolve (reference) {
 	return /** @type {Grammar | undefined} */ (ret);
 }
 
+/**
+ *
+ * @param {RegExpExecArray} match
+ * @returns {({type: string, content: string} | string)[]}
+ */
+export function tokenizeByNamedGroups (match) {
+	const str = match[0];
+	const result = [];
+	let i = 0;
+
+	const entries = Object.entries(match.indices?.groups || {})
+		.map(([type, [start, end]]) => ({
+			type,
+			start: start - match.index,
+			end: end - match.index,
+		}))
+		.sort((a, b) => a.start - b.start);
+
+	for (let { type, start, end } of entries) {
+		if (start > i) {
+			result.push(str.slice(i, start));
+		}
+
+		const content = str.slice(start, end);
+		type = camelToKebabCase(type);
+		result.push({ type, content });
+		i = end;
+	}
+
+	if (i < str.length) {
+		result.push(str.slice(i));
+	}
+
+	return result;
+}
+
 /**
  * @import { Prism } from '../prism.js';
  * @import { Grammar, LanguageRegistry } from '../../types.d.ts';

diff --git a/src/languages/markdown.js b/src/languages/markdown.js
@@ -99,73 +99,24 @@ export default {
 					// ```optional language
 					// code block
 					// ```
-					pattern: /^```[\s\S]*?^```$/m,
-					greedy: true,
-					inside: /** @type {Grammar} */ ({
-						'code-block': {
-							pattern: /^(```.*(?:\n|\r\n?))[\s\S]+?(?=(?:\n|\r\n?)^```$)/m,
-							lookbehind: true,
-						},
-						'code-language': {
-							pattern: /^(```).+/,
-							lookbehind: true,
-						},
-						'punctuation': /```/,
-						/** @type {Grammar['$tokenize']} */
-						$tokenize (code, grammar, Prism) {
-							const tokens = Prism.tokenize(code, withoutTokenize(grammar));
-
-							/*
-							 * Add the correct `language-xxxx` class to this code block. Keep in mind that the `code-language` token
-							 * is optional. But the grammar is defined so that there is only one case we have to handle:
-							 *
-							 * token.content = [
-							 *     <span class="punctuation">```</span>,
-							 *     <span class="code-language">xxxx</span>,
-							 *     '\n', // exactly one new lines (\r or \n or \r\n)
-							 *     <span class="code-block">...</span>,
-							 *     '\n', // exactly one new lines again
-							 *     <span class="punctuation">```</span>
-							 * ];
-							 */
-
-							const codeLang = tokens[1];
-							const codeBlock = tokens[3];
-
-							if (
-								typeof codeLang === 'object' &&
-								typeof codeBlock === 'object' &&
-								codeLang.type === 'code-language' &&
-								codeBlock.type === 'code-block'
-							) {
-								// this might be a language that Prism does not support
-
-								// do some replacements to support C++, C#, and F#
-								const lang = getTextContent(codeLang.content)
-									.replace(/\b#/g, 'sharp')
-									.replace(/\b\+\+/g, 'pp');
-								// only use the first word
-								const langName = /[a-z][\w-]*/i.exec(lang)?.[0].toLowerCase();
-								if (langName) {
-									codeBlock.addAlias('language-' + langName);
-
-									const grammar =
-										Prism.languageRegistry.getLanguage(lang)?.resolvedGrammar;
-									if (grammar) {
-										codeBlock.content = Prism.tokenize(
-											getTextContent(codeBlock),
-											grammar
-										);
-									}
-									else {
-										codeBlock.addAlias('needs-highlighting');
-									}
+					pattern:
+						/^```\s*(?<codeLanguage>\{[^{}]*\}|[a-z+#-]+)(?:[ \t][^\n\r]*)?(?:\n|\r\n?)(?<codeBlock>[\s\S]*?)(?:\n|\r\n?)```$/im,
+					inside: {
+						'code-block': groups => {
+							let lang = groups.codeLanguage;
+							// Extract language code from curly braces like {r pressure, echo=FALSE} → r
+							if (lang.startsWith('{') && lang.endsWith('}')) {
+								const match = lang.slice(1, -1).match(/^\s*([a-z+#-]+)/i);
+								if (match) {
+									lang = match[0];
 								}
 							}
-
-							return tokens;
+							// Apply transformations: c++ → cpp, c# → csharp, f# → fsharp, etc.
+							lang = lang.replace(/\b#/g, 'sharp').replace(/\b\+\+/g, 'pp');
+							return lang.toLowerCase();
 						},
-					}),
+						'punctuation': /```/,
+					},
 				},
 			],
 			'title': [

diff --git a/src/shared/languages/templating.js b/src/shared/languages/templating.js
@@ -127,9 +127,9 @@ export function templating (code, hostGrammar, templateGrammar, Prism) {
 	hostGrammar = resolve.call(Prism, hostGrammar);
 	templateGrammar = resolve.call(Prism, templateGrammar);
 
-	const { hostCode, tokenStack } = buildPlaceholders(code, templateGrammar, Prism);
+	const { hostCode, tokenStack } = buildPlaceholders(code, /** @type {Grammar | undefined} */ (templateGrammar), Prism);
 
-	const tokens = hostGrammar ? Prism.tokenize(hostCode, hostGrammar) : [hostCode];
+	const tokens = hostGrammar ? Prism.tokenize(hostCode, /** @type {Grammar} */ (hostGrammar)) : [hostCode];
 	insertIntoHostToken(tokens, tokenStack);
 	return tokens;
 }
@@ -145,10 +145,10 @@ export function embeddedIn (hostGrammar) {
 }
 
 /**
- * @import { Prism, Token } from '../../core.js';
- * @import { TokenStream, TokenStack, Grammar, LanguageRegistry} from '../../types.d.ts';
+ * @import { Prism } from '../../core.js';
+ * @import { TokenStream, TokenStack, Grammar } from '../../types.d.ts';
  */
 
 /**
- * @typedef {Grammar | string | undefined | null} GrammarRef
+ * @typedef {Grammar | Function | string | undefined | null} GrammarRef
  */
diff --git a/src/shared/util.js b/src/shared/util.js
@@ -76,3 +76,13 @@ export function kebabToCamelCase (kebab) {
 	const [first, ...others] = kebab.split(/-/);
 	return first + others.map(capitalize).join('');
 }
+
+/**
+ * Converts the given camel case identifier to a kebab case identifier.
+ *
+ * @param {string} str
+ * @returns
+ */
+export function camelToKebabCase (str) {
+	return (str + '').replace(/[A-Z]/g, l => '-' + l.toLowerCase());
+}