Skip to content

Commit 9651f39

Browse files
committed
fix: additional parsing cases
1 parent cf16b10 commit 9651f39

3 files changed

Lines changed: 164 additions & 3 deletions

File tree

src/compute-engine/latex-syntax/parse.ts

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1578,6 +1578,34 @@ export class _Parser implements Parser {
15781578

15791579
this.skipSpace();
15801580

1581+
// Check for optional subscript: log_2(x) or log_{10}(x)
1582+
let subscript: MathJsonExpression | null = null;
1583+
if (this.peek === '_') {
1584+
const underIndex = this.index;
1585+
this.index++; // skip '_'
1586+
subscript = this.parseGroup();
1587+
if (subscript === null) {
1588+
// Try bare digits/letters: _2, _10, _b
1589+
const subStart = this.index;
1590+
if (!this.atEnd && /^[a-zA-Z]$/.test(this.peek)) {
1591+
subscript = this.peek;
1592+
this.index++;
1593+
} else {
1594+
let digits = '';
1595+
while (!this.atEnd && /^[0-9]$/.test(this.peek)) {
1596+
digits += this.peek;
1597+
this.index++;
1598+
}
1599+
if (digits) subscript = parseInt(digits);
1600+
}
1601+
if (subscript === null) {
1602+
this.index = start;
1603+
return null;
1604+
}
1605+
}
1606+
this.skipSpace();
1607+
}
1608+
15811609
// Check for optional exponent: sin^2(x) or sin^{10}(x)
15821610
let exponent: MathJsonExpression | null = null;
15831611
if (this.peek === '^') {
@@ -1699,7 +1727,17 @@ export class _Parser implements Parser {
16991727
return exponent !== null ? ['Power', result, exponent] : result;
17001728
}
17011729

1702-
const result: MathJsonExpression = [fnName, ...args];
1730+
// Special case: log with subscript base (matches \log_b behavior)
1731+
// log_2(x) -> ['Lb', x], log_10(x) -> ['Log', x], log_b(x) -> ['Log', x, b]
1732+
let result: MathJsonExpression;
1733+
if (name === 'log' && subscript !== null) {
1734+
if (subscript === 2) result = ['Lb', ...args];
1735+
else if (subscript === 10) result = ['Log', ...args];
1736+
else result = ['Log', args[0], subscript];
1737+
} else {
1738+
result = [fnName, ...args];
1739+
}
1740+
17031741
return exponent !== null ? ['Power', result, exponent] : result;
17041742
}
17051743

src/compute-engine/latex-syntax/tokenizer.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ const UNICODE_SUPERSCRIPT_MAP: Record<string, string> = {
1919
'\u2078': '8', // ⁸
2020
'\u2079': '9', // ⁹
2121
'\u207B': '-', // ⁻
22+
'\u2071': 'i', // ⁱ
23+
'\u207F': 'n', // ⁿ
2224
};
2325

2426
const UNICODE_SUBSCRIPT_MAP: Record<string, string> = {
@@ -72,8 +74,8 @@ class Tokenizer {
7274
s = s.replace(/\u2212/g, '-');
7375

7476
// Replace Unicode superscript sequences with ^{...}
75-
// Handles: ⁰¹²³⁴⁵⁶⁷⁸⁹ and ⁻ (superscript minus)
76-
s = s.replace(/[¹²³]+/g, (m) => {
77+
// Handles: ⁰¹²³⁴⁵⁶⁷⁸⁹⁻ⁱⁿ
78+
s = s.replace(/[¹²³]+/g, (m) => {
7779
const digits = Array.from(m)
7880
.map((c) => UNICODE_SUPERSCRIPT_MAP[c])
7981
.join('');

test/compute-engine/latex-syntax/parsing.test.ts

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -454,4 +454,125 @@ describe('NON-STRICT MODE (Math-ASCII/Typst-like syntax)', () => {
454454
`);
455455
});
456456
});
457+
458+
describe('Bare function exponents', () => {
459+
test('sin^2(x)', () => {
460+
expect(ce.parse('sin^2(x)', { strict: false })).toMatchInlineSnapshot(
461+
`["Square", ["Sin", "x"]]`
462+
);
463+
});
464+
465+
test('cos^{10}(x)', () => {
466+
expect(
467+
ce.parse('cos^{10}(x)', { strict: false })
468+
).toMatchInlineSnapshot(`["Power", ["Cos", "x"], 10]`);
469+
});
470+
471+
test('tan^-1(x)', () => {
472+
expect(ce.parse('tan^-1(x)', { strict: false })).toMatchInlineSnapshot(
473+
`["Divide", 1, ["Tan", "x"]]`
474+
);
475+
});
476+
477+
test('sin^2(x) + cos^2(x) identity', () => {
478+
const a = ce.parse('sin^2(x) + cos^2(x)', { strict: false });
479+
expect(a.isEqual(1)).toBe(true);
480+
});
481+
});
482+
483+
describe('Bare log with subscript', () => {
484+
test('log_2(x) → base 2', () => {
485+
expect(ce.parse('log_2(x)', { strict: false })).toMatchInlineSnapshot(
486+
`["Log", "x", 2]`
487+
);
488+
});
489+
490+
test('log_{10}(x) → base 10 (default)', () => {
491+
expect(
492+
ce.parse('log_{10}(x)', { strict: false })
493+
).toMatchInlineSnapshot(`["Log", "x"]`);
494+
});
495+
496+
test('log_3(x) → base 3', () => {
497+
expect(ce.parse('log_3(x)', { strict: false })).toMatchInlineSnapshot(
498+
`["Log", "x", 3]`
499+
);
500+
});
501+
502+
test('log_b(x) → variable base', () => {
503+
expect(ce.parse('log_b(x)', { strict: false })).toMatchInlineSnapshot(
504+
`["Log", "x", "b"]`
505+
);
506+
});
507+
});
508+
509+
describe('Unicode superscripts', () => {
510+
test('x² → Power', () => {
511+
expect(ce.parse('x²')).toMatchInlineSnapshot(`["Square", "x"]`);
512+
});
513+
514+
test('x²³ → multi-digit exponent', () => {
515+
expect(ce.parse('x²³')).toMatchInlineSnapshot(
516+
`["Power", "x", 23]`
517+
);
518+
});
519+
520+
test('x⁻² → negative exponent', () => {
521+
expect(ce.parse('x⁻²')).toMatchInlineSnapshot(
522+
`["Divide", 1, ["Square", "x"]]`
523+
);
524+
});
525+
526+
test('xⁿ → letter superscript', () => {
527+
expect(ce.parse('xⁿ')).toMatchInlineSnapshot(
528+
`["Power", "x", "n"]`
529+
);
530+
});
531+
532+
test('2ⁿ → numeric base with letter exponent', () => {
533+
expect(ce.parse('2ⁿ')).toMatchInlineSnapshot(
534+
`["Power", 2, "n"]`
535+
);
536+
});
537+
538+
test('\\sin²(x) → trig with Unicode exponent', () => {
539+
expect(ce.parse('\\sin²(x)')).toMatchInlineSnapshot(
540+
`["Square", ["Sin", "x"]]`
541+
);
542+
});
543+
544+
test('sin²(x) bare + Unicode', () => {
545+
expect(ce.parse('sin²(x)', { strict: false })).toMatchInlineSnapshot(
546+
`["Square", ["Sin", "x"]]`
547+
);
548+
});
549+
});
550+
551+
describe('Unicode subscripts', () => {
552+
test('x₁ → subscript', () => {
553+
expect(ce.parse('x₁')).toMatchInlineSnapshot(`x_1`);
554+
});
555+
556+
test('x₁₂ → multi-digit subscript', () => {
557+
expect(ce.parse('x₁₂')).toMatchInlineSnapshot(`x_12`);
558+
});
559+
560+
test('x₁² → subscript + superscript', () => {
561+
expect(ce.parse('x₁²')).toMatchInlineSnapshot(
562+
`["Square", "x_1"]`
563+
);
564+
});
565+
566+
test('log₂(x) → Unicode subscript on bare log', () => {
567+
expect(ce.parse('log₂(x)', { strict: false })).toMatchInlineSnapshot(
568+
`["Log", "x", 2]`
569+
);
570+
});
571+
572+
test('log₁₀(x) → Unicode subscript base 10', () => {
573+
expect(
574+
ce.parse('log₁₀(x)', { strict: false })
575+
).toMatchInlineSnapshot(`["Log", "x"]`);
576+
});
577+
});
457578
});

0 commit comments

Comments
 (0)