Skip to content

Commit ddd234a

Browse files
committed
internal/frontend: separate goldmark readme code from legacy overview code
The overview file contains mostly legacy code used to construct the overview page. This change separates the goldmark code in preparation for updates that will generate a TOC for the readme and the future removal of all overview related code. For golang/go#39297 Change-Id: Ifa5d0ee3983478fd25c6c59fc1bd2c45457cc05c Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/267117 Run-TryBot: Jamal Carvalho <[email protected]> TryBot-Result: kokoro <[email protected]> Reviewed-by: Jonathan Amsterdam <[email protected]> Trust: Jamal Carvalho <[email protected]>
1 parent 8ac0e15 commit ddd234a

File tree

5 files changed

+196
-138
lines changed

5 files changed

+196
-138
lines changed

internal/frontend/overview.go

Lines changed: 6 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,6 @@ import (
1919
"github.com/google/safehtml/uncheckedconversions"
2020
"github.com/microcosm-cc/bluemonday"
2121
"github.com/russross/blackfriday/v2"
22-
"github.com/yuin/goldmark"
23-
emoji "github.com/yuin/goldmark-emoji"
24-
"github.com/yuin/goldmark/extension"
25-
"github.com/yuin/goldmark/parser"
26-
"github.com/yuin/goldmark/renderer"
27-
goldmarkHtml "github.com/yuin/goldmark/renderer/html"
28-
"github.com/yuin/goldmark/text"
29-
"github.com/yuin/goldmark/util"
3022
"golang.org/x/net/html"
3123
"golang.org/x/net/html/atom"
3224
"golang.org/x/pkgsite/internal"
@@ -85,7 +77,7 @@ func constructOverviewDetails(ctx context.Context, mi *internal.ModuleInfo, read
8577
}
8678
if overview.Redistributable && readme != nil {
8779
overview.ReadMeSource = fileSource(mi.ModulePath, mi.Version, readme.Filepath)
88-
r, err := ReadmeHTML(ctx, mi, readme)
80+
r, err := LegacyReadmeHTML(ctx, mi, readme)
8981
if err != nil {
9082
return nil, err
9183
}
@@ -145,13 +137,14 @@ func blackfridayReadmeHTML(ctx context.Context, readme *internal.Readme, mi *int
145137
return sanitizeHTML(b), nil
146138
}
147139

148-
// ReadmeHTML sanitizes readmeContents based on bluemondy.UGCPolicy and returns
140+
// LegacyReadmeHTML sanitizes readmeContents based on bluemondy.UGCPolicy and returns
149141
// a safehtml.HTML. If readmeFilePath indicates that this is a markdown file,
150142
// it will also render the markdown contents using blackfriday.
151143
//
152-
// It is exported to support external testing.
153-
func ReadmeHTML(ctx context.Context, mi *internal.ModuleInfo, readme *internal.Readme) (_ safehtml.HTML, err error) {
154-
defer derrors.Wrap(&err, "readmeHTML(%s@%s)", mi.ModulePath, mi.Version)
144+
// This function is exported for use in an external tool that uses this package to
145+
// compare readme files to see how changes in processing will affect them.
146+
func LegacyReadmeHTML(ctx context.Context, mi *internal.ModuleInfo, readme *internal.Readme) (_ safehtml.HTML, err error) {
147+
defer derrors.Wrap(&err, "LegacyReadmeHTML(%s@%s)", mi.ModulePath, mi.Version)
155148
if readme == nil || readme.Contents == "" {
156149
return safehtml.HTML{}, nil
157150
}
@@ -164,78 +157,9 @@ func ReadmeHTML(ctx context.Context, mi *internal.ModuleInfo, readme *internal.R
164157
return h, nil
165158
}
166159

167-
if experiment.IsActive(ctx, internal.ExperimentGoldmark) {
168-
// Sets priority value so that we always use our custom transformer
169-
// instead of the default ones. The default values are in:
170-
// https://github.com/yuin/goldmark/blob/7b90f04af43131db79ec320be0bd4744079b346f/parser/parser.go#L567
171-
const ASTTransformerPriority = 10000
172-
gdMarkdown := goldmark.New(
173-
goldmark.WithParserOptions(
174-
// WithHeadingAttribute allows us to include other attributes in
175-
// heading tags. This is useful for our aria-level implementation of
176-
// increasing heading rankings.
177-
parser.WithHeadingAttribute(),
178-
// Generates an id in every heading tag. This is used in github in
179-
// order to generate a link with a hash that a user would scroll to
180-
// <h1 id="goldmark">goldmark</h1> => github.com/yuin/goldmark#goldmark
181-
parser.WithAutoHeadingID(),
182-
// Include custom ASTTransformer using the readme and module info to
183-
// use translateRelativeLink and translateHTML to modify the AST
184-
// before it is rendered.
185-
parser.WithASTTransformers(util.Prioritized(&ASTTransformer{
186-
info: mi.SourceInfo,
187-
readme: readme,
188-
}, ASTTransformerPriority)),
189-
),
190-
// These extensions lets users write HTML code in the README. This is
191-
// fine since we process the contents using bluemonday after.
192-
goldmark.WithRendererOptions(goldmarkHtml.WithUnsafe(), goldmarkHtml.WithXHTML()),
193-
goldmark.WithExtensions(
194-
extension.GFM, // Support Github Flavored Markdown.
195-
emoji.Emoji, // Support Github markdown emoji markup.
196-
),
197-
)
198-
gdMarkdown.Renderer().AddOptions(
199-
renderer.WithNodeRenderers(
200-
util.Prioritized(NewHTMLRenderer(mi.SourceInfo, readme), 100),
201-
),
202-
)
203-
204-
var b bytes.Buffer
205-
contents := []byte(readme.Contents)
206-
gdRenderer := gdMarkdown.Renderer()
207-
gdParser := gdMarkdown.Parser()
208-
209-
reader := text.NewReader(contents)
210-
doc := gdParser.Parse(reader)
211-
212-
if err := gdRenderer.Render(&b, contents, doc); err != nil {
213-
return safehtml.HTML{}, nil
214-
}
215-
216-
return sanitizeGoldmarkHTML(&b), nil
217-
}
218160
return blackfridayReadmeHTML(ctx, readme, mi)
219161
}
220162

221-
// sanitizeGoldmarkHTML sanitizes HTML from a bytes.Buffer so that it is safe.
222-
func sanitizeGoldmarkHTML(b *bytes.Buffer) safehtml.HTML {
223-
p := bluemonday.UGCPolicy()
224-
225-
p.AllowAttrs("width", "align").OnElements("img")
226-
p.AllowAttrs("width", "align").OnElements("div")
227-
p.AllowAttrs("width", "align").OnElements("p")
228-
// Allow accessible headings (i.e <div role="heading" aria-level="7">).
229-
p.AllowAttrs("width", "align", "role", "aria-level").OnElements("div")
230-
for _, h := range []string{"h1", "h2", "h3", "h4", "h5", "h6"} {
231-
// Needed to preserve github styles heading font-sizes
232-
p.AllowAttrs("class").OnElements(h)
233-
}
234-
235-
s := string(p.SanitizeBytes(b.Bytes()))
236-
return uncheckedconversions.HTMLFromStringKnownToSatisfyTypeContract(s)
237-
}
238-
239163
// sanitizeHTML reads HTML from r and sanitizes it to ensure it is safe.
240164
func sanitizeHTML(r io.Reader) safehtml.HTML {
241165
// bluemonday.UGCPolicy allows a broad selection of HTML elements and

internal/frontend/overview_test.go

Lines changed: 2 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -177,50 +177,6 @@ func TestPackageOverviewDetails(t *testing.T) {
177177
}
178178
}
179179

180-
func TestGoldmarkReadmeHTML(t *testing.T) {
181-
ctx := experiment.NewContext(context.Background(), internal.ExperimentGoldmark)
182-
mod := &internal.ModuleInfo{
183-
Version: sample.VersionString,
184-
SourceInfo: source.NewGitHubInfo(sample.ModulePath, "", "v1.2.3"),
185-
}
186-
for _, tc := range []struct {
187-
name string
188-
mi *internal.ModuleInfo
189-
readme *internal.Readme
190-
want string
191-
}{
192-
{
193-
name: "Top level heading is h3 from ####, and following header levels become hN-1",
194-
mi: mod,
195-
readme: &internal.Readme{
196-
Filepath: sample.ReadmeFilePath,
197-
Contents: "#### Heading Rank 4\n\n##### Heading Rank 5",
198-
},
199-
want: "<h3 class=\"h4\" id=\"heading-rank-4\">Heading Rank 4</h3>\n<h4 class=\"h5\" id=\"heading-rank-5\">Heading Rank 5</h4>",
200-
},
201-
{
202-
name: "Github markdown emoji markup is properly rendered",
203-
mi: mod,
204-
readme: &internal.Readme{
205-
Filepath: sample.ReadmeFilePath,
206-
Contents: "# :zap: Zap \n\n :joy:",
207-
},
208-
want: "<h3 class=\"h1\" id=\"zap-zap\">⚡ Zap</h3>\n<p>😂</p>",
209-
},
210-
} {
211-
t.Run(tc.name, func(t *testing.T) {
212-
hgot, err := ReadmeHTML(ctx, tc.mi, tc.readme)
213-
if err != nil {
214-
t.Fatal(err)
215-
}
216-
got := strings.TrimSpace(hgot.String())
217-
if diff := cmp.Diff(tc.want, got); diff != "" {
218-
t.Errorf("readmeHTML(%v) mismatch (-want +got):\n%s", tc.mi, diff)
219-
}
220-
})
221-
}
222-
}
223-
224180
func TestBlackfridayReadmeHTML(t *testing.T) {
225181
ctx := context.Background()
226182
aModule := &internal.ModuleInfo{
@@ -411,13 +367,13 @@ func TestBlackfridayReadmeHTML(t *testing.T) {
411367
},
412368
}
413369
checkReadme := func(ctx context.Context, t *testing.T, mi *internal.ModuleInfo, readme *internal.Readme, want string) {
414-
hgot, err := ReadmeHTML(ctx, mi, readme)
370+
hgot, err := LegacyReadmeHTML(ctx, mi, readme)
415371
if err != nil {
416372
t.Fatal(err)
417373
}
418374
got := strings.TrimSpace(hgot.String())
419375
if diff := cmp.Diff(want, got); diff != "" {
420-
t.Errorf("readmeHTML(%v) mismatch (-want +got):\n%s", mi, diff)
376+
t.Errorf("LegacyReadmeHTML(%v) mismatch (-want +got):\n%s", mi, diff)
421377
}
422378
}
423379
for _, test := range tests {

internal/frontend/readme.go

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
// Copyright 2020 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package frontend
6+
7+
import (
8+
"bytes"
9+
"context"
10+
11+
"github.com/google/safehtml"
12+
"github.com/google/safehtml/template"
13+
"github.com/google/safehtml/uncheckedconversions"
14+
"github.com/microcosm-cc/bluemonday"
15+
"github.com/yuin/goldmark"
16+
emoji "github.com/yuin/goldmark-emoji"
17+
"github.com/yuin/goldmark/extension"
18+
"github.com/yuin/goldmark/parser"
19+
"github.com/yuin/goldmark/renderer"
20+
goldmarkHtml "github.com/yuin/goldmark/renderer/html"
21+
"github.com/yuin/goldmark/text"
22+
"github.com/yuin/goldmark/util"
23+
"golang.org/x/pkgsite/internal"
24+
"golang.org/x/pkgsite/internal/derrors"
25+
)
26+
27+
// ReadmeHTML sanitizes readmeContents based on bluemondy.UGCPolicy and returns
28+
// a safehtml.HTML. If readmeFilePath indicates that this is a markdown file,
29+
// it will also render the markdown contents using goldmark.
30+
//
31+
// This function is exported for use in an external tool that uses this package to
32+
// compare readme files to see how changes in processing will affect them.
33+
func ReadmeHTML(ctx context.Context, mi *internal.ModuleInfo, readme *internal.Readme) (_ safehtml.HTML, err error) {
34+
defer derrors.Wrap(&err, "ReadmeHTML(%s@%s)", mi.ModulePath, mi.Version)
35+
if readme == nil || readme.Contents == "" {
36+
return safehtml.HTML{}, nil
37+
}
38+
if !isMarkdown(readme.Filepath) {
39+
t := template.Must(template.New("").Parse(`<pre class="readme">{{.}}</pre>`))
40+
h, err := t.ExecuteToHTML(readme.Contents)
41+
if err != nil {
42+
return safehtml.HTML{}, err
43+
}
44+
return h, nil
45+
}
46+
47+
// Sets priority value so that we always use our custom transformer
48+
// instead of the default ones. The default values are in:
49+
// https://github.com/yuin/goldmark/blob/7b90f04af43131db79ec320be0bd4744079b346f/parser/parser.go#L567
50+
const ASTTransformerPriority = 10000
51+
gdMarkdown := goldmark.New(
52+
goldmark.WithParserOptions(
53+
// WithHeadingAttribute allows us to include other attributes in
54+
// heading tags. This is useful for our aria-level implementation of
55+
// increasing heading rankings.
56+
parser.WithHeadingAttribute(),
57+
// Generates an id in every heading tag. This is used in github in
58+
// order to generate a link with a hash that a user would scroll to
59+
// <h1 id="goldmark">goldmark</h1> => github.com/yuin/goldmark#goldmark
60+
parser.WithAutoHeadingID(),
61+
// Include custom ASTTransformer using the readme and module info to
62+
// use translateRelativeLink and translateHTML to modify the AST
63+
// before it is rendered.
64+
parser.WithASTTransformers(util.Prioritized(&ASTTransformer{
65+
info: mi.SourceInfo,
66+
readme: readme,
67+
}, ASTTransformerPriority)),
68+
),
69+
// These extensions lets users write HTML code in the README. This is
70+
// fine since we process the contents using bluemonday after.
71+
goldmark.WithRendererOptions(goldmarkHtml.WithUnsafe(), goldmarkHtml.WithXHTML()),
72+
goldmark.WithExtensions(
73+
extension.GFM, // Support Github Flavored Markdown.
74+
emoji.Emoji, // Support Github markdown emoji markup.
75+
),
76+
)
77+
gdMarkdown.Renderer().AddOptions(
78+
renderer.WithNodeRenderers(
79+
util.Prioritized(NewHTMLRenderer(mi.SourceInfo, readme), 100),
80+
),
81+
)
82+
83+
var b bytes.Buffer
84+
contents := []byte(readme.Contents)
85+
gdRenderer := gdMarkdown.Renderer()
86+
gdParser := gdMarkdown.Parser()
87+
88+
reader := text.NewReader(contents)
89+
doc := gdParser.Parse(reader)
90+
91+
if err := gdRenderer.Render(&b, contents, doc); err != nil {
92+
return safehtml.HTML{}, nil
93+
}
94+
return sanitizeGoldmarkHTML(&b), nil
95+
}
96+
97+
// sanitizeGoldmarkHTML sanitizes HTML from a bytes.Buffer so that it is safe.
98+
func sanitizeGoldmarkHTML(b *bytes.Buffer) safehtml.HTML {
99+
p := bluemonday.UGCPolicy()
100+
101+
p.AllowAttrs("width", "align").OnElements("img")
102+
p.AllowAttrs("width", "align").OnElements("div")
103+
p.AllowAttrs("width", "align").OnElements("p")
104+
// Allow accessible headings (i.e <div role="heading" aria-level="7">).
105+
p.AllowAttrs("width", "align", "role", "aria-level").OnElements("div")
106+
for _, h := range []string{"h1", "h2", "h3", "h4", "h5", "h6"} {
107+
// Needed to preserve github styles heading font-sizes
108+
p.AllowAttrs("class").OnElements(h)
109+
}
110+
111+
s := string(p.SanitizeBytes(b.Bytes()))
112+
return uncheckedconversions.HTMLFromStringKnownToSatisfyTypeContract(s)
113+
}

internal/frontend/readme_test.go

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
// Copyright 2020 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package frontend
6+
7+
import (
8+
"context"
9+
"strings"
10+
"testing"
11+
12+
"github.com/google/go-cmp/cmp"
13+
"golang.org/x/pkgsite/internal"
14+
"golang.org/x/pkgsite/internal/experiment"
15+
"golang.org/x/pkgsite/internal/source"
16+
"golang.org/x/pkgsite/internal/testing/sample"
17+
)
18+
19+
func TestGoldmarkReadmeHTML(t *testing.T) {
20+
ctx := experiment.NewContext(context.Background(), internal.ExperimentGoldmark)
21+
mod := &internal.ModuleInfo{
22+
Version: sample.VersionString,
23+
SourceInfo: source.NewGitHubInfo(sample.ModulePath, "", sample.VersionString),
24+
}
25+
for _, tc := range []struct {
26+
name string
27+
mi *internal.ModuleInfo
28+
readme *internal.Readme
29+
want string
30+
}{
31+
{
32+
name: "Top level heading is h3 from ####, and following header levels become hN-1",
33+
mi: mod,
34+
readme: &internal.Readme{
35+
Filepath: sample.ReadmeFilePath,
36+
Contents: "#### Heading Rank 4\n\n##### Heading Rank 5",
37+
},
38+
want: "<h3 class=\"h4\" id=\"heading-rank-4\">Heading Rank 4</h3>\n<h4 class=\"h5\" id=\"heading-rank-5\">Heading Rank 5</h4>",
39+
},
40+
{
41+
name: "Github markdown emoji markup is properly rendered",
42+
mi: mod,
43+
readme: &internal.Readme{
44+
Filepath: sample.ReadmeFilePath,
45+
Contents: "# :zap: Zap \n\n :joy:",
46+
},
47+
want: "<h3 class=\"h1\" id=\"zap-zap\">⚡ Zap</h3>\n<p>😂</p>",
48+
},
49+
} {
50+
t.Run(tc.name, func(t *testing.T) {
51+
hgot, err := ReadmeHTML(ctx, tc.mi, tc.readme)
52+
if err != nil {
53+
t.Fatal(err)
54+
}
55+
got := strings.TrimSpace(hgot.String())
56+
if diff := cmp.Diff(tc.want, got); diff != "" {
57+
t.Errorf("ReadmeHTML(%v) mismatch (-want +got):\n%s", tc.mi, diff)
58+
}
59+
})
60+
}
61+
}

internal/frontend/unit_main.go

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -170,18 +170,22 @@ func moduleInfo(um *internal.UnitMeta) *internal.ModuleInfo {
170170
}
171171

172172
// readmeContent renders the readme to html.
173-
func readmeContent(ctx context.Context, um *internal.UnitMeta, readme *internal.Readme) (safehtml.HTML, error) {
173+
func readmeContent(ctx context.Context, um *internal.UnitMeta, readme *internal.Readme) (_ safehtml.HTML, err error) {
174174
defer middleware.ElapsedStat(ctx, "readmeContent")()
175-
176-
if um.IsRedistributable && readme != nil {
177-
mi := moduleInfo(um)
178-
readme, err := ReadmeHTML(ctx, mi, readme)
179-
if err != nil {
180-
return safehtml.HTML{}, err
181-
}
182-
return readme, nil
175+
if !um.IsRedistributable || readme == nil {
176+
return safehtml.HTML{}, nil
177+
}
178+
mi := moduleInfo(um)
179+
var readmeHTML safehtml.HTML
180+
if experiment.IsActive(ctx, internal.ExperimentGoldmark) {
181+
readmeHTML, err = ReadmeHTML(ctx, mi, readme)
182+
} else {
183+
readmeHTML, err = LegacyReadmeHTML(ctx, mi, readme)
184+
}
185+
if err != nil {
186+
return safehtml.HTML{}, err
183187
}
184-
return safehtml.HTML{}, nil
188+
return readmeHTML, nil
185189
}
186190

187191
func getNestedModules(ctx context.Context, ds internal.DataSource, um *internal.UnitMeta) ([]*NestedModule, error) {

0 commit comments

Comments
 (0)