@@ -336,8 +336,18 @@ export function extract_essence(
336336 . map ( ( s ) => s . trim ( ) )
337337 . filter ( ( s ) => s . length > 10 ) ;
338338 if ( sents . length === 0 ) return raw . slice ( 0 , max_len ) ;
339- const score_sent = ( s : string ) : number => {
339+ const score_sent = ( s : string , idx : number ) : number => {
340340 let sc = 0 ;
341+ // First sentence bonus - titles/headers are essential for retrieval
342+ if ( idx === 0 ) sc += 10 ;
343+ // Second sentence often contains key context
344+ if ( idx === 1 ) sc += 5 ;
345+ // Header/section markers (markdown or label-style)
346+ if ( / ^ # + \s / . test ( s ) || / ^ [ A - Z ] [ A - Z \s ] + : / . test ( s ) ) sc += 8 ;
347+ // Colon-prefixed labels like "PROBLEM:", "SOLUTION:", "CONTEXT:"
348+ if ( / ^ [ A - Z ] [ a - z ] + : / i. test ( s ) ) sc += 6 ;
349+ // Date patterns (ISO format)
350+ if ( / \d { 4 } - \d { 2 } - \d { 2 } / . test ( s ) ) sc += 7 ;
341351 if (
342352 / \b ( j a n u a r y | f e b r u a r y | m a r c h | a p r i l | m a y | j u n e | j u l y | a u g u s t | s e p t e m b e r | o c t o b e r | n o v e m b e r | d e c e m b e r ) \s + \d + / i. test (
343353 s ,
@@ -347,7 +357,7 @@ export function extract_essence(
347357 if ( / \$ \d + | \d + \s * ( m i l e s | d o l l a r s | y e a r s | m o n t h s | k m ) / . test ( s ) ) sc += 4 ;
348358 if ( / \b [ A - Z ] [ a - z ] + (?: \s + [ A - Z ] [ a - z ] + ) + / . test ( s ) ) sc += 3 ;
349359 if (
350- / \b ( b o u g h t | p u r c h a s e d | s e r v i c e d | v i s i t e d | w e n t | g o t | r e c e i v e d | p a i d | e a r n e d | l e a r n e d | d i s c o v e r e d | f o u n d | s a w | m e t | c o m p l e t e d | f i n i s h e d ) \b / i. test (
360+ / \b ( b o u g h t | p u r c h a s e d | s e r v i c e d | v i s i t e d | w e n t | g o t | r e c e i v e d | p a i d | e a r n e d | l e a r n e d | d i s c o v e r e d | f o u n d | s a w | m e t | c o m p l e t e d | f i n i s h e d | f i x e d | i m p l e m e n t e d | c r e a t e d | u p d a t e d | a d d e d | r e m o v e d | r e s o l v e d ) \b / i. test (
351361 s ,
352362 )
353363 )
@@ -357,21 +367,42 @@ export function extract_essence(
357367 if ( / \b ( I | m y | m e ) \b / . test ( s ) ) sc += 1 ;
358368 return sc ;
359369 } ;
360- const scored = sents . map ( ( s ) => ( { text : s , score : score_sent ( s ) } ) ) ;
370+ const scored = sents . map ( ( s , idx ) => ( { text : s , score : score_sent ( s , idx ) , idx } ) ) ;
361371 scored . sort ( ( a , b ) => b . score - a . score ) ;
372+ // Build result, ensuring first sentence is always included if space permits
362373 let comp = "" ;
363- for ( const item of scored ) {
364- const cand = comp ? `${ comp } . ${ item . text } ` : item . text ;
365- if ( cand . length <= max_len ) {
366- comp = cand ;
367- } else if ( comp . length < max_len * 0.7 ) {
368- const rem = max_len - comp . length - 2 ;
369- if ( rem > 20 ) {
370- comp += ". " + item . text . slice ( 0 , rem ) ;
374+ const firstSent = sents [ 0 ] ;
375+ if ( firstSent && firstSent . length <= max_len * 0.5 ) {
376+ comp = firstSent ;
377+ const remaining = scored . filter ( ( item ) => item . idx !== 0 ) ;
378+ for ( const item of remaining ) {
379+ const cand = comp ? `${ comp } . ${ item . text } ` : item . text ;
380+ if ( cand . length <= max_len ) {
381+ comp = cand ;
382+ } else if ( comp . length < max_len * 0.7 ) {
383+ const rem = max_len - comp . length - 2 ;
384+ if ( rem > 20 ) {
385+ comp += ". " + item . text . slice ( 0 , rem ) ;
386+ }
387+ break ;
388+ } else {
389+ break ;
390+ }
391+ }
392+ } else {
393+ for ( const item of scored ) {
394+ const cand = comp ? `${ comp } . ${ item . text } ` : item . text ;
395+ if ( cand . length <= max_len ) {
396+ comp = cand ;
397+ } else if ( comp . length < max_len * 0.7 ) {
398+ const rem = max_len - comp . length - 2 ;
399+ if ( rem > 20 ) {
400+ comp += ". " + item . text . slice ( 0 , rem ) ;
401+ }
402+ break ;
403+ } else {
404+ break ;
371405 }
372- break ;
373- } else {
374- break ;
375406 }
376407 }
377408 return comp || raw . slice ( 0 , max_len ) ;
0 commit comments