Skip to content

Commit f7be2a6

Browse files
fix(website): updated askai markdown documentation (#2788)
* fix(website): updated askai markdown documentation * fix(website): updated askai markdown documentation
1 parent 5e9bdfe commit f7be2a6

File tree

1 file changed

+91
-33
lines changed

1 file changed

+91
-33
lines changed

packages/website/docs/v4/askai-markdown-indexing.mdx

Lines changed: 91 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -82,19 +82,28 @@ For users who need advanced customization or want to understand the underlying c
8282
indexName: "my-markdown-index",
8383
pathsToMatch: ["https://example.com/docs/**"],
8484
recordExtractor: ({ $, url, helpers }) => {
85-
const text = helpers.markdown("main"); // Change "main" to match your content tag (e.g., "main", "article", etc.)
85+
// Target only the main content, excluding navigation
86+
const text = helpers.markdown(
87+
"main > *:not(nav):not(header):not(.breadcrumb)",
88+
);
89+
8690
if (text === "") return [];
8791

88-
// Extract language or other attributes as needed. Optional
8992
const language = $("html").attr("lang") || "en";
9093

94+
const title = $("head > title").text();
95+
96+
// Get the main heading for better searchability
97+
const h1 = $("main h1").first().text();
98+
9199
return helpers.splitTextIntoRecords({
92100
text,
93101
baseRecord: {
94102
url,
95103
objectID: url,
96-
title: $("head > title").text(),
97-
lang: language, // Add more attributes as needed
104+
title: title || h1,
105+
heading: h1, // Add main heading as separate field
106+
lang: language,
98107
},
99108
maxRecordBytes: 100000, // Higher = fewer, larger records. Lower = more, smaller records.
100109
// Note: Increasing this value may increase the token count for LLMs, which can affect context size and cost.
@@ -110,12 +119,15 @@ For users who need advanced customization or want to understand the underlying c
110119
```js
111120
// initialIndexSettings: { ...,
112121
"my-markdown-index": {
113-
attributesForFaceting: ["lang"], // Add more if you extract more attributes
122+
attributesForFaceting: ["lang"],
114123
ignorePlurals: true,
115-
minProximity: 4,
124+
minProximity: 1,
116125
removeStopWords: false,
117-
searchableAttributes: ["unordered(title)", "unordered(text)"],
118-
removeWordsIfNoResults: "allOptional" // This will help if the LLM finds no results. A graceful fallback.
126+
searchableAttributes: ["title", "heading", "unordered(text)"],
127+
removeWordsIfNoResults: "lastWords",
128+
attributesToHighlight: ["title", "text"],
129+
typoTolerance: false,
130+
advancedSyntax: false,
119131
},
120132
// ...},
121133
```
@@ -397,20 +409,28 @@ import TabItem from '@theme/TabItem';
397409
indexName: "my-markdown-index",
398410
pathsToMatch: ["https://example.com/**"],
399411
recordExtractor: ({ $, url, helpers }) => {
400-
const text = helpers.markdown("main"); // Change "main" to match your content tag (e.g., "main", "article", etc.)
412+
// Target only the main content, excluding navigation
413+
const text = helpers.markdown(
414+
"main > *:not(nav):not(header):not(.breadcrumb)",
415+
);
416+
401417
if (text === "") return [];
402418

403-
// Customize selectors or meta extraction as needed. Optional
404419
const language = $("html").attr("lang") || "en";
405420

421+
const title = $("head > title").text();
422+
423+
// Get the main heading for better searchability
424+
const h1 = $("main h1").first().text();
425+
406426
return helpers.splitTextIntoRecords({
407427
text,
408428
baseRecord: {
409429
url,
410430
objectID: url,
411-
title: $("head > title").text(),
412-
// Add more optional attributes to the record
413-
lang: language
431+
title: title || h1,
432+
heading: h1, // Add main heading as separate field
433+
lang: language,
414434
},
415435
maxRecordBytes: 100000, // Higher = fewer, larger records. Lower = more, smaller records.
416436
// Note: Increasing this value may increase the token count for LLMs, which can affect context size and cost.
@@ -424,10 +444,13 @@ import TabItem from '@theme/TabItem';
424444
"my-markdown-index": {
425445
attributesForFaceting: ["lang"], // Recommended if you add more attributes outside of objectID
426446
ignorePlurals: true,
427-
minProximity: 4,
447+
minProximity: 1,
428448
removeStopWords: false,
429-
searchableAttributes: ["unordered(title)", "unordered(text)"],
430-
removeWordsIfNoResults: "allOptional" // This will help if the LLM finds no results. A graceful fallback.
449+
searchableAttributes: ["title", "heading", "unordered(text)"],
450+
removeWordsIfNoResults: "lastWords",
451+
attributesToHighlight: ["title", "text"],
452+
typoTolerance: false,
453+
advancedSyntax: false,
431454
},
432455
// ...},
433456
```
@@ -446,7 +469,11 @@ import TabItem from '@theme/TabItem';
446469
indexName: "my-markdown-index",
447470
pathsToMatch: ["https://example.com/docs/**"],
448471
recordExtractor: ({ $, url, helpers }) => {
449-
const text = helpers.markdown("main"); // Change "main" to match your content tag (e.g., "main", "article", etc.)
472+
// Target only the main content, excluding navigation
473+
const text = helpers.markdown(
474+
"main > *:not(nav):not(header):not(.breadcrumb)",
475+
);
476+
450477
if (text === "") return [];
451478

452479
// Extract meta tag values. These are required for Docusaurus
@@ -457,12 +484,18 @@ import TabItem from '@theme/TabItem';
457484
const docusaurus_tag =
458485
$('meta[name="docsearch:docusaurus_tag"]').attr("content") || "";
459486

487+
const title = $("head > title").text();
488+
489+
// Get the main heading for better searchability
490+
const h1 = $("main h1").first().text();
491+
460492
return helpers.splitTextIntoRecords({
461493
text,
462494
baseRecord: {
463495
url,
464496
objectID: url,
465-
title: $("head > title").text(),
497+
title: title || h1,
498+
heading: h1, // Add main heading as separate field
466499
lang: language, // Required for Docusaurus
467500
language, // Required for Docusaurus
468501
version: version.split(","), // in case there are multiple versions. Required for Docusaurus
@@ -483,10 +516,13 @@ import TabItem from '@theme/TabItem';
483516
"my-markdown-index": {
484517
attributesForFaceting: ["lang", "language", "version", "docusaurus_tag"], // Required for Docusaurus
485518
ignorePlurals: true,
486-
minProximity: 4,
519+
minProximity: 1,
487520
removeStopWords: false,
488-
searchableAttributes: ["unordered(title)", "unordered(text)"],
489-
removeWordsIfNoResults: "allOptional" // This will help if the LLM finds no results. A graceful fallback.
521+
searchableAttributes: ["title", "heading", "unordered(text)"],
522+
removeWordsIfNoResults: "lastWords",
523+
attributesToHighlight: ["title", "text"],
524+
typoTolerance: false,
525+
advancedSyntax: false,
490526
},
491527
// ...},
492528
```
@@ -505,19 +541,27 @@ import TabItem from '@theme/TabItem';
505541
indexName: "my-markdown-index",
506542
pathsToMatch: ["https://example.com/docs/**"],
507543
recordExtractor: ({ $, url, helpers }) => {
508-
const text = helpers.markdown("main"); // Change "main" to match your content tag (e.g., "main", "article", etc.)
544+
// Target only the main content, excluding navigation
545+
const text = helpers.markdown(
546+
"main > *:not(nav):not(header):not(.breadcrumb)",
547+
);
548+
509549
if (text === "") return [];
510550

511-
// Extract meta tag values. These are required for VitePress
512551
const language = $("html").attr("lang") || "en";
513552

553+
const title = $("head > title").text();
554+
555+
// Get the main heading for better searchability
556+
const h1 = $("main h1").first().text();
514557

515558
return helpers.splitTextIntoRecords({
516559
text,
517560
baseRecord: {
518561
url,
519-
title: $("head > title").text(),
520562
objectID: url,
563+
title: title || h1,
564+
heading: h1, // Add main heading as separate field
521565
lang: language, // Required for VitePress
522566
},
523567
maxRecordBytes: 100000, // Higher = fewer, larger records. Lower = more, smaller records.
@@ -532,10 +576,13 @@ import TabItem from '@theme/TabItem';
532576
"my-markdown-index": {
533577
attributesForFaceting: ["lang"], // Required for VitePress
534578
ignorePlurals: true,
535-
minProximity: 4,
579+
minProximity: 1,
536580
removeStopWords: false,
537-
searchableAttributes: ["unordered(title)", "unordered(text)"],
538-
removeWordsIfNoResults: "allOptional" // This will help if the LLM finds no results. A graceful fallback.
581+
searchableAttributes: ["title", "heading", "unordered(text)"],
582+
removeWordsIfNoResults: "lastWords",
583+
attributesToHighlight: ["title", "text"],
584+
typoTolerance: false,
585+
advancedSyntax: false,
539586
},
540587
// ...},
541588
```
@@ -554,19 +601,27 @@ import TabItem from '@theme/TabItem';
554601
indexName: "my-markdown-index",
555602
pathsToMatch: ["https://example.com/docs/**"],
556603
recordExtractor: ({ $, url, helpers }) => {
557-
const text = helpers.markdown("main"); // Change "main" to match your content tag (e.g., "main", "article", etc.)
604+
// Target only the main content, excluding navigation
605+
const text = helpers.markdown(
606+
"main > *:not(nav):not(header):not(.breadcrumb)",
607+
);
608+
558609
if (text === "") return [];
559610

560-
// Extract meta tag values. These are required for Astro/StarLight
561611
const language = $("html").attr("lang") || "en";
562612

613+
const title = $("head > title").text();
614+
615+
// Get the main heading for better searchability
616+
const h1 = $("main h1").first().text();
563617

564618
return helpers.splitTextIntoRecords({
565619
text,
566620
baseRecord: {
567621
url,
568-
title: $("head > title").text(),
569622
objectID: url,
623+
title: title || h1,
624+
heading: h1, // Add main heading as separate field
570625
lang: language, // Required for Astro/StarLight
571626
},
572627
maxRecordBytes: 100000, // Higher = fewer, larger records. Lower = more, smaller records.
@@ -581,10 +636,13 @@ import TabItem from '@theme/TabItem';
581636
"my-markdown-index": {
582637
attributesForFaceting: ["lang"], // Required for Astro/StarLight
583638
ignorePlurals: true,
584-
minProximity: 4,
639+
minProximity: 1,
585640
removeStopWords: false,
586-
searchableAttributes: ["unordered(title)", "unordered(text)"],
587-
removeWordsIfNoResults: "allOptional" // This will help if the LLM finds no results. A graceful fallback.
641+
searchableAttributes: ["title", "heading", "unordered(text)"],
642+
removeWordsIfNoResults: "lastWords",
643+
attributesToHighlight: ["title", "text"],
644+
typoTolerance: false,
645+
advancedSyntax: false,
588646
},
589647
// ...},
590648
```

0 commit comments

Comments
 (0)