sanitizer.go 2.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. package markup
  2. import (
  3. "net/url"
  4. "strings"
  5. "sync"
  6. "github.com/microcosm-cc/bluemonday"
  7. "gogs.io/gogs/internal/conf"
  8. "gogs.io/gogs/internal/lazyregexp"
  9. )
  10. // Sanitizer is a protection wrapper of *bluemonday.Policy which does not allow
  11. // any modification to the underlying policies once it's been created.
  12. type Sanitizer struct {
  13. policy *bluemonday.Policy
  14. init sync.Once
  15. }
  16. var sanitizer = &Sanitizer{
  17. policy: bluemonday.UGCPolicy(),
  18. }
  19. // NewSanitizer initializes sanitizer with allowed attributes based on settings.
  20. // Multiple calls to this function will only create one instance of Sanitizer during
  21. // entire application lifecycle.
  22. func NewSanitizer() {
  23. sanitizer.init.Do(func() {
  24. // We only want to allow HighlightJS specific classes for code blocks
  25. sanitizer.policy.AllowAttrs("class").Matching(lazyregexp.New(`^language-\w+$`).Regexp()).OnElements("code")
  26. // Checkboxes
  27. sanitizer.policy.AllowAttrs("type").Matching(lazyregexp.New(`^checkbox$`).Regexp()).OnElements("input")
  28. sanitizer.policy.AllowAttrs("checked", "disabled").OnElements("input")
  29. // Only allow data URIs with safe image MIME types to prevent XSS via
  30. // "data:text/html" payloads.
  31. sanitizer.policy.AllowURLSchemeWithCustomPolicy("data", isSafeDataURI)
  32. // Custom URL-Schemes
  33. sanitizer.policy.AllowURLSchemes(conf.Markdown.CustomURLSchemes...)
  34. })
  35. }
  36. // isSafeDataURI returns whether the given data URI uses a safe image MIME type.
  37. func isSafeDataURI(u *url.URL) bool {
  38. // The opaque data of a data URI has the form "mediatype;base64,data" or
  39. // "mediatype,data". We only allow common image MIME types.
  40. mediatype, _, _ := strings.Cut(u.Opaque, ";")
  41. mediatype, _, _ = strings.Cut(mediatype, ",")
  42. switch strings.TrimSpace(strings.ToLower(mediatype)) {
  43. case "image/png", "image/jpeg", "image/gif", "image/webp", "image/x-icon":
  44. return true
  45. }
  46. return false
  47. }
  48. // Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist.
  49. func Sanitize(s string) string {
  50. return sanitizer.policy.Sanitize(s)
  51. }
  52. // SanitizeBytes takes a []byte slice that contains a HTML fragment or document and applies policy whitelist.
  53. func SanitizeBytes(b []byte) []byte {
  54. return sanitizer.policy.SanitizeBytes(b)
  55. }