فهرست منبع

markup: restrict data URI scheme to safe image MIME types (#8174)

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
ᴊᴏᴇ ᴄʜᴇɴ 2 روز پیش
والد
کامیت
441c64d7bd
2فایلهای تغییر یافته به همراه32 افزوده شده و 2 حذف شده
  1. 18 2
      internal/markup/sanitizer.go
  2. 14 0
      internal/markup/sanitizer_test.go

+ 18 - 2
internal/markup/sanitizer.go

@@ -1,6 +1,8 @@
 package markup
 
 import (
+	"net/url"
+	"strings"
 	"sync"
 
 	"github.com/microcosm-cc/bluemonday"
@@ -32,14 +34,28 @@ func NewSanitizer() {
 		sanitizer.policy.AllowAttrs("type").Matching(lazyregexp.New(`^checkbox$`).Regexp()).OnElements("input")
 		sanitizer.policy.AllowAttrs("checked", "disabled").OnElements("input")
 
-		// Data URLs
-		sanitizer.policy.AllowURLSchemes("data")
+		// Only allow data URIs with safe image MIME types to prevent XSS via
+		// "data:text/html" payloads.
+		sanitizer.policy.AllowURLSchemeWithCustomPolicy("data", isSafeDataURI)
 
 		// Custom URL-Schemes
 		sanitizer.policy.AllowURLSchemes(conf.Markdown.CustomURLSchemes...)
 	})
 }
 
+// isSafeDataURI returns whether the given data URI uses a safe image MIME type.
+func isSafeDataURI(u *url.URL) bool {
+	// The opaque data of a data URI has the form "mediatype;base64,data" or
+	// "mediatype,data". We only allow common image MIME types.
+	mediatype, _, _ := strings.Cut(u.Opaque, ";")
+	mediatype, _, _ = strings.Cut(mediatype, ",")
+	switch strings.TrimSpace(strings.ToLower(mediatype)) {
+	case "image/png", "image/jpeg", "image/gif", "image/webp", "image/x-icon":
+		return true
+	}
+	return false
+}
+
 // Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist.
 func Sanitize(s string) string {
 	return sanitizer.policy.Sanitize(s)

+ 14 - 0
internal/markup/sanitizer_test.go

@@ -26,6 +26,20 @@ func Test_Sanitizer(t *testing.T) {
 		{input: `<input type="hidden">`, expVal: ``},
 		{input: `<input type="checkbox">`, expVal: `<input type="checkbox">`},
 		{input: `<input checked disabled autofocus>`, expVal: `<input checked="" disabled="">`},
+
+		// Data URIs: safe image types should be allowed
+		{input: `<img src="">`, expVal: `<img src="">`},
+		{input: `<img src="">`, expVal: `<img src="">`},
+		{input: `<img src="">`, expVal: `<img src="">`},
+		{input: `<img src="">`, expVal: `<img src="">`},
+
+		// Data URIs: text/html must be stripped to prevent XSS (GHSA-xrcr-gmf5-2r8j)
+		{input: `<a href="data:text/html;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">Click</a>`, expVal: `Click`},
+		{input: `<a href="data:text/html,<script>alert(1)</script>">XSS</a>`, expVal: `XSS`},
+		{input: `<img src="data:text/html;base64,abc">`, expVal: ``},
+
+		// Data URIs: SVG must be stripped (can contain embedded JavaScript)
+		{input: `<img src="">`, expVal: ``},
 	}
 	for _, test := range tests {
 		t.Run(test.input, func(t *testing.T) {