@@ -82,19 +82,28 @@ For users who need advanced customization or want to understand the underlying c
8282 indexName: " my-markdown-index" ,
8383 pathsToMatch: [" https://example.com/docs/**" ],
8484 recordExtractor : ({ $, url, helpers }) => {
85- const text = helpers .markdown (" main" ); // Change "main" to match your content tag (e.g., "main", "article", etc.)
85+ // Target only the main content, excluding navigation
86+ const text = helpers .markdown (
87+ " main > *:not(nav):not(header):not(.breadcrumb)" ,
88+ );
89+
8690 if (text === " " ) return [];
8791
88- // Extract language or other attributes as needed. Optional
8992 const language = $ (" html" ).attr (" lang" ) || " en" ;
9093
94+ const title = $ (" head > title" ).text ();
95+
96+ // Get the main heading for better searchability
97+ const h1 = $ (" main h1" ).first ().text ();
98+
9199 return helpers .splitTextIntoRecords ({
92100 text,
93101 baseRecord: {
94102 url,
95103 objectID: url,
96- title: $ (" head > title" ).text (),
97- lang: language, // Add more attributes as needed
104+ title: title || h1,
105+ heading: h1, // Add main heading as separate field
106+ lang: language,
98107 },
99108 maxRecordBytes: 100000 , // Higher = fewer, larger records. Lower = more, smaller records.
100109 // Note: Increasing this value may increase the token count for LLMs, which can affect context size and cost.
@@ -110,12 +119,15 @@ For users who need advanced customization or want to understand the underlying c
110119``` js
111120// initialIndexSettings: { ...,
112121" my-markdown-index" : {
113- attributesForFaceting: [" lang" ], // Add more if you extract more attributes
122+ attributesForFaceting: [" lang" ],
114123 ignorePlurals: true ,
115- minProximity: 4 ,
124+ minProximity: 1 ,
116125 removeStopWords: false ,
117- searchableAttributes: [" unordered(title)" , " unordered(text)" ],
118- removeWordsIfNoResults: " allOptional" // This will help if the LLM finds no results. A graceful fallback.
126+ searchableAttributes: [" title" , " heading" , " unordered(text)" ],
127+ removeWordsIfNoResults: " lastWords" ,
128+ attributesToHighlight: [" title" , " text" ],
129+ typoTolerance: false ,
130+ advancedSyntax: false ,
119131},
120132// ...},
121133```
@@ -397,20 +409,28 @@ import TabItem from '@theme/TabItem';
397409 indexName: " my-markdown-index" ,
398410 pathsToMatch: [" https://example.com/**" ],
399411 recordExtractor : ({ $, url, helpers }) => {
400- const text = helpers .markdown (" main" ); // Change "main" to match your content tag (e.g., "main", "article", etc.)
412+ // Target only the main content, excluding navigation
413+ const text = helpers .markdown (
414+ " main > *:not(nav):not(header):not(.breadcrumb)" ,
415+ );
416+
401417 if (text === " " ) return [];
402418
403- // Customize selectors or meta extraction as needed. Optional
404419 const language = $ (" html" ).attr (" lang" ) || " en" ;
405420
421+ const title = $ (" head > title" ).text ();
422+
423+ // Get the main heading for better searchability
424+ const h1 = $ (" main h1" ).first ().text ();
425+
406426 return helpers .splitTextIntoRecords ({
407427 text,
408428 baseRecord: {
409429 url,
410430 objectID: url,
411- title: $ ( " head > title " ). text () ,
412- // Add more optional attributes to the record
413- lang: language
431+ title: title || h1 ,
432+ heading : h1, // Add main heading as separate field
433+ lang: language,
414434 },
415435 maxRecordBytes: 100000 , // Higher = fewer, larger records. Lower = more, smaller records.
416436 // Note: Increasing this value may increase the token count for LLMs, which can affect context size and cost.
@@ -424,10 +444,13 @@ import TabItem from '@theme/TabItem';
424444" my-markdown-index" : {
425445 attributesForFaceting: [" lang" ], // Recommended if you add more attributes outside of objectID
426446 ignorePlurals: true ,
427- minProximity: 4 ,
447+ minProximity: 1 ,
428448 removeStopWords: false ,
429- searchableAttributes: [" unordered(title)" , " unordered(text)" ],
430- removeWordsIfNoResults: " allOptional" // This will help if the LLM finds no results. A graceful fallback.
449+ searchableAttributes: [" title" , " heading" , " unordered(text)" ],
450+ removeWordsIfNoResults: " lastWords" ,
451+ attributesToHighlight: [" title" , " text" ],
452+ typoTolerance: false ,
453+ advancedSyntax: false ,
431454},
432455// ...},
433456```
@@ -446,7 +469,11 @@ import TabItem from '@theme/TabItem';
446469 indexName: " my-markdown-index" ,
447470 pathsToMatch: [" https://example.com/docs/**" ],
448471 recordExtractor : ({ $, url, helpers }) => {
449- const text = helpers .markdown (" main" ); // Change "main" to match your content tag (e.g., "main", "article", etc.)
472+ // Target only the main content, excluding navigation
473+ const text = helpers .markdown (
474+ " main > *:not(nav):not(header):not(.breadcrumb)" ,
475+ );
476+
450477 if (text === " " ) return [];
451478
452479 // Extract meta tag values. These are required for Docusaurus
@@ -457,12 +484,18 @@ import TabItem from '@theme/TabItem';
457484 const docusaurus_tag =
458485 $ (' meta[name="docsearch:docusaurus_tag"]' ).attr (" content" ) || " " ;
459486
487+ const title = $ (" head > title" ).text ();
488+
489+ // Get the main heading for better searchability
490+ const h1 = $ (" main h1" ).first ().text ();
491+
460492 return helpers .splitTextIntoRecords ({
461493 text,
462494 baseRecord: {
463495 url,
464496 objectID: url,
465- title: $ (" head > title" ).text (),
497+ title: title || h1,
498+ heading: h1, // Add main heading as separate field
466499 lang: language, // Required for Docusaurus
467500 language, // Required for Docusaurus
468501 version: version .split (" ," ), // in case there are multiple versions. Required for Docusaurus
@@ -483,10 +516,13 @@ import TabItem from '@theme/TabItem';
483516" my-markdown-index" : {
484517 attributesForFaceting: [" lang" , " language" , " version" , " docusaurus_tag" ], // Required for Docusaurus
485518 ignorePlurals: true ,
486- minProximity: 4 ,
519+ minProximity: 1 ,
487520 removeStopWords: false ,
488- searchableAttributes: [" unordered(title)" , " unordered(text)" ],
489- removeWordsIfNoResults: " allOptional" // This will help if the LLM finds no results. A graceful fallback.
521+ searchableAttributes: [" title" , " heading" , " unordered(text)" ],
522+ removeWordsIfNoResults: " lastWords" ,
523+ attributesToHighlight: [" title" , " text" ],
524+ typoTolerance: false ,
525+ advancedSyntax: false ,
490526},
491527// ...},
492528```
@@ -505,19 +541,27 @@ import TabItem from '@theme/TabItem';
505541 indexName: " my-markdown-index" ,
506542 pathsToMatch: [" https://example.com/docs/**" ],
507543 recordExtractor : ({ $, url, helpers }) => {
508- const text = helpers .markdown (" main" ); // Change "main" to match your content tag (e.g., "main", "article", etc.)
544+ // Target only the main content, excluding navigation
545+ const text = helpers .markdown (
546+ " main > *:not(nav):not(header):not(.breadcrumb)" ,
547+ );
548+
509549 if (text === " " ) return [];
510550
511- // Extract meta tag values. These are required for VitePress
512551 const language = $ (" html" ).attr (" lang" ) || " en" ;
513552
553+ const title = $ (" head > title" ).text ();
554+
555+ // Get the main heading for better searchability
556+ const h1 = $ (" main h1" ).first ().text ();
514557
515558 return helpers .splitTextIntoRecords ({
516559 text,
517560 baseRecord: {
518561 url,
519- title: $ (" head > title" ).text (),
520562 objectID: url,
563+ title: title || h1,
564+ heading: h1, // Add main heading as separate field
521565 lang: language, // Required for VitePress
522566 },
523567 maxRecordBytes: 100000 , // Higher = fewer, larger records. Lower = more, smaller records.
@@ -532,10 +576,13 @@ import TabItem from '@theme/TabItem';
532576" my-markdown-index" : {
533577 attributesForFaceting: [" lang" ], // Required for VitePress
534578 ignorePlurals: true ,
535- minProximity: 4 ,
579+ minProximity: 1 ,
536580 removeStopWords: false ,
537- searchableAttributes: [" unordered(title)" , " unordered(text)" ],
538- removeWordsIfNoResults: " allOptional" // This will help if the LLM finds no results. A graceful fallback.
581+ searchableAttributes: [" title" , " heading" , " unordered(text)" ],
582+ removeWordsIfNoResults: " lastWords" ,
583+ attributesToHighlight: [" title" , " text" ],
584+ typoTolerance: false ,
585+ advancedSyntax: false ,
539586},
540587// ...},
541588```
@@ -554,19 +601,27 @@ import TabItem from '@theme/TabItem';
554601 indexName: " my-markdown-index" ,
555602 pathsToMatch: [" https://example.com/docs/**" ],
556603 recordExtractor : ({ $, url, helpers }) => {
557- const text = helpers .markdown (" main" ); // Change "main" to match your content tag (e.g., "main", "article", etc.)
604+ // Target only the main content, excluding navigation
605+ const text = helpers .markdown (
606+ " main > *:not(nav):not(header):not(.breadcrumb)" ,
607+ );
608+
558609 if (text === " " ) return [];
559610
560- // Extract meta tag values. These are required for Astro/StarLight
561611 const language = $ (" html" ).attr (" lang" ) || " en" ;
562612
613+ const title = $ (" head > title" ).text ();
614+
615+ // Get the main heading for better searchability
616+ const h1 = $ (" main h1" ).first ().text ();
563617
564618 return helpers .splitTextIntoRecords ({
565619 text,
566620 baseRecord: {
567621 url,
568- title: $ (" head > title" ).text (),
569622 objectID: url,
623+ title: title || h1,
624+ heading: h1, // Add main heading as separate field
570625 lang: language, // Required for Astro/StarLight
571626 },
572627 maxRecordBytes: 100000 , // Higher = fewer, larger records. Lower = more, smaller records.
@@ -581,10 +636,13 @@ import TabItem from '@theme/TabItem';
581636" my-markdown-index" : {
582637 attributesForFaceting: [" lang" ], // Required for Astro/StarLight
583638 ignorePlurals: true ,
584- minProximity: 4 ,
639+ minProximity: 1 ,
585640 removeStopWords: false ,
586- searchableAttributes: [" unordered(title)" , " unordered(text)" ],
587- removeWordsIfNoResults: " allOptional" // This will help if the LLM finds no results. A graceful fallback.
641+ searchableAttributes: [" title" , " heading" , " unordered(text)" ],
642+ removeWordsIfNoResults: " lastWords" ,
643+ attributesToHighlight: [" title" , " text" ],
644+ typoTolerance: false ,
645+ advancedSyntax: false ,
588646},
589647// ...},
590648```
0 commit comments