Skip to content

Commit 56a70e8

Browse files
committed
internal/frontend: change id generation to use parsed markdown text
To produce heading ids that match between the goldmark version of the code and the rsc.io/markdown version of the code, use the markdown parser to parse the markdown and then extract the text from it. We do this because rsc.io/markdown doesn't provide the raw markdown for us to generate the ids with. This will change the ids that are generated for some headings. For golang/go#61399 Change-Id: Id0f26b311b59e848ff1753e058d413ed3168926d Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/548255 LUCI-TryBot-Result: Go LUCI <[email protected]> kokoro-CI: kokoro <[email protected]> Reviewed-by: Jonathan Amsterdam <[email protected]>
1 parent 00698da commit 56a70e8

File tree

3 files changed

+55
-26
lines changed

3 files changed

+55
-26
lines changed

internal/frontend/goldmark.go

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ import (
1010
"bytes"
1111
"context"
1212
"fmt"
13-
"regexp"
1413
"strings"
1514

1615
"github.com/yuin/goldmark/ast"
@@ -22,6 +21,7 @@ import (
2221
"golang.org/x/pkgsite/internal"
2322
"golang.org/x/pkgsite/internal/log"
2423
"golang.org/x/pkgsite/internal/source"
24+
"rsc.io/markdown"
2525
)
2626

2727
// astTransformer is a default transformer of the goldmark tree. We pass in
@@ -185,20 +185,35 @@ func newIDs() parser.IDs {
185185
// unit page. Duplicated heading ids are given an incremental suffix. See
186186
// readme_test.go for examples.
187187
func (s *ids) Generate(value []byte, kind ast.NodeKind) []byte {
188-
// Matches strings like `<tag attr="value">Text</tag>` or `[![Text](file.svg)](link.html)`.
189-
r := regexp.MustCompile(`(<[^<>]+>|\[\!\[[^\]]+]\([^\)]+\)\]\([^\)]+\))`)
190-
str := r.ReplaceAllString(string(value), "")
188+
var defaultID string
189+
if kind == ast.KindHeading {
190+
defaultID = "heading"
191+
} else {
192+
defaultID = "id"
193+
}
194+
195+
parser := &markdown.Parser{}
196+
doc := parser.Parse("# " + string(value))
197+
return []byte(s.generateID(doc, defaultID))
198+
}
199+
200+
func (s *ids) generateID(block markdown.Block, defaultID string) string {
201+
var buf bytes.Buffer
202+
walkBlocks([]markdown.Block{block}, func(b markdown.Block) error {
203+
if t, ok := b.(*markdown.Text); ok {
204+
for _, inl := range t.Inline {
205+
inl.PrintText(&buf)
206+
}
207+
}
208+
return nil
209+
})
191210
f := func(c rune) bool {
192211
return !('a' <= c && c <= 'z') && !('A' <= c && c <= 'Z') && !('0' <= c && c <= '9')
193212
}
194-
str = strings.Join(strings.FieldsFunc(str, f), "-")
213+
str := strings.Join(strings.FieldsFunc(buf.String(), f), "-")
195214
str = strings.ToLower(str)
196215
if len(str) == 0 {
197-
if kind == ast.KindHeading {
198-
str = "heading"
199-
} else {
200-
str = "id"
201-
}
216+
str = defaultID
202217
}
203218
key := str
204219
for i := 1; ; i++ {
@@ -208,7 +223,7 @@ func (s *ids) Generate(value []byte, kind ast.NodeKind) []byte {
208223
}
209224
key = fmt.Sprintf("%s-%d", str, i)
210225
}
211-
return []byte("readme-" + key)
226+
return "readme-" + key
212227
}
213228

214229
// Put implements Put from the goldmark parser IDs interface.

internal/frontend/markdown.go

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ import (
1212
"strings"
1313

1414
"github.com/google/safehtml/template"
15-
"github.com/yuin/goldmark/ast"
1615
"golang.org/x/pkgsite/internal"
1716
"golang.org/x/pkgsite/internal/derrors"
1817
"golang.org/x/pkgsite/internal/log"
@@ -118,6 +117,8 @@ func walkBlocks(blocks []markdown.Block, walkFunc func(b markdown.Block) error)
118117

119118
err = nil
120119
switch x := b.(type) {
120+
case *markdown.Document:
121+
err = walkBlocks(x.Blocks, walkFunc)
121122
case *markdown.Text:
122123
case *markdown.Paragraph:
123124
err = walkBlocks([]markdown.Block{x.Text}, walkFunc)
@@ -130,7 +131,9 @@ func walkBlocks(blocks []markdown.Block, walkFunc func(b markdown.Block) error)
130131
case *markdown.Quote:
131132
err = walkBlocks(x.Blocks, walkFunc)
132133
case *markdown.HTMLBlock:
133-
continue
134+
case *markdown.CodeBlock:
135+
case *markdown.Empty:
136+
case *markdown.ThematicBreak:
134137
default:
135138
return fmt.Errorf("unhandled block type %T", x)
136139
}
@@ -287,6 +290,12 @@ func transformHeadingsToHTML(doc *markdown.Document) {
287290
rewriteHeadingsBlocks = func(blocks []markdown.Block) {
288291
for i, b := range blocks {
289292
switch x := b.(type) {
293+
case *markdown.Paragraph:
294+
rewriteHeadingsBlocks([]markdown.Block{x.Text})
295+
case *markdown.List:
296+
rewriteHeadingsBlocks(x.Items)
297+
case *markdown.Item:
298+
rewriteHeadingsBlocks(x.Blocks)
290299
case *markdown.Quote:
291300
rewriteHeadingsBlocks(x.Blocks)
292301
case *markdown.Heading:
@@ -338,19 +347,12 @@ var htmlQuoteEscaper = strings.NewReplacer(
338347
// function, but we don't have the raw markdown anymore, so we use the
339348
// text instead.
340349
func rewriteHeadingIDs(doc *markdown.Document) {
341-
ids := newIDs()
350+
ids := &ids{
351+
values: map[string]bool{},
352+
}
342353
walkBlocks(doc.Blocks, func(b markdown.Block) error {
343354
if heading, ok := b.(*markdown.Heading); ok {
344-
var buf bytes.Buffer
345-
for _, inl := range heading.Text.Inline {
346-
// Hack: use HTML because ids strips out html tags.
347-
// TODO(matloob): change the goldmark code to not use
348-
// raw markdown text and instead depend on the text of the
349-
// nodes.
350-
inl.PrintHTML(&buf)
351-
}
352-
353-
id := ids.Generate(buf.Bytes(), ast.KindHeading)
355+
id := ids.generateID(heading, "heading")
354356
heading.ID = string(id)
355357
}
356358
return nil

internal/frontend/readme_test.go

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -414,9 +414,9 @@ func TestReadme(t *testing.T) {
414414
Contents: `# [![Image Text](file.svg)](link.html)
415415
`,
416416
},
417-
wantHTML: `<h3 class="h1" id="readme-heading"><a href="https://github.com/valid/module_name/blob/v1.0.0/link.html" rel="nofollow"><img src="https://github.com/valid/module_name/raw/v1.0.0/file.svg" alt="Image Text"/></a></h3>`,
417+
wantHTML: `<h3 class="h1" id="readme-image-text"><a href="https://github.com/valid/module_name/blob/v1.0.0/link.html" rel="nofollow"><img src="https://github.com/valid/module_name/raw/v1.0.0/file.svg" alt="Image Text"/></a></h3>`,
418418
wantOutline: []*Heading{
419-
{Level: 1, Text: "Image Text", ID: "readme-heading"},
419+
{Level: 1, Text: "Image Text", ID: "readme-image-text"},
420420
},
421421
},
422422
{
@@ -457,6 +457,18 @@ func TestReadme(t *testing.T) {
457457
{Level: 1, Text: "Heading", ID: "readme-heading-3"},
458458
},
459459
},
460+
{
461+
name: "tag in heading",
462+
unit: unit,
463+
readme: &internal.Readme{
464+
Filepath: "README.md",
465+
Contents: `# A link <a href="link">link</a>`,
466+
},
467+
wantHTML: `<h3 class="h1" id="readme-a-link-link">A link <a href="link" rel="nofollow">link</a></h3>`,
468+
wantOutline: []*Heading{
469+
{Level: 1, Text: "A link link", ID: "readme-a-link-link"},
470+
},
471+
},
460472
} {
461473
t.Run(test.name, func(t *testing.T) {
462474
processReadmes := map[string]func(ctx context.Context, u *internal.Unit) (frontendReadme *Readme, err error){

0 commit comments

Comments
 (0)