From 23e7d33bb67e62352787950c080716e0d7e10cc5 Mon Sep 17 00:00:00 2001 From: akuma06 Date: Tue, 23 May 2017 22:09:20 +0200 Subject: [PATCH 1/2] Fixing insert of unneeded html tags in db Added a Sanitize function in util * Possibility to add model in it * Already a preset default model Comments shouldn't be allowed html, too difficult to check every comment for broken html Torrents are still allowed html tags but I don't think it should since we use markdown. --- router/upload.go | 7 +++---- router/view_torrent_handler.go | 3 ++- util/markdown.go | 21 +++++++++++++++++++++ 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/router/upload.go b/router/upload.go index a51f8da4..9643cf73 100644 --- a/router/upload.go +++ b/router/upload.go @@ -20,7 +20,6 @@ import ( "github.com/NyaaPantsu/nyaa/util" "github.com/NyaaPantsu/nyaa/util/categories" "github.com/NyaaPantsu/nyaa/util/metainfo" - "github.com/microcosm-cc/bluemonday" "github.com/zeebo/bencode" ) @@ -83,7 +82,7 @@ var ErrInvalidWebsiteLink = errors.New("Website url or IRC link is invalid") // error indicating a torrent's category is invalid var ErrInvalidTorrentCategory = errors.New("Torrent category is invalid") -var p = bluemonday.UGCPolicy() +// var p = bluemonday.UGCPolicy() /** UploadForm.ExtractInfo takes an http request and computes all fields for this form @@ -100,7 +99,7 @@ func (f *UploadForm) ExtractInfo(r *http.Request) error { // trim whitespace f.Name = util.TrimWhitespaces(f.Name) - f.Description = p.Sanitize(util.TrimWhitespaces(f.Description)) + f.Description = util.Sanitize(util.TrimWhitespaces(f.Description), "default") f.WebsiteLink = util.TrimWhitespaces(f.WebsiteLink) f.Magnet = util.TrimWhitespaces(f.Magnet) cache.Impl.ClearAll() @@ -241,7 +240,7 @@ func (f *UploadForm) ExtractEditInfo(r *http.Request) error { // trim whitespace f.Name = util.TrimWhitespaces(f.Name) - f.Description = p.Sanitize(util.TrimWhitespaces(f.Description)) + f.Description = util.Sanitize(util.TrimWhitespaces(f.Description), "default") catsSplit := strings.Split(f.Category, "_") // need this to prevent out of index panics diff --git a/router/view_torrent_handler.go b/router/view_torrent_handler.go index 96cf3bbd..23da2a48 100644 --- a/router/view_torrent_handler.go +++ b/router/view_torrent_handler.go @@ -15,6 +15,7 @@ import ( "github.com/NyaaPantsu/nyaa/service/report" "github.com/NyaaPantsu/nyaa/service/torrent" "github.com/NyaaPantsu/nyaa/service/user/permission" + "github.com/NyaaPantsu/nyaa/util" "github.com/NyaaPantsu/nyaa/util/languages" "github.com/NyaaPantsu/nyaa/util/log" msg "github.com/NyaaPantsu/nyaa/util/messages" @@ -90,7 +91,7 @@ func PostCommentHandler(w http.ResponseWriter, r *http.Request) { messages.AddErrorT("errors", "bad_captcha") } } - content := p.Sanitize(r.FormValue("comment")) + content := util.Sanitize(r.FormValue("comment"), "") if strings.TrimSpace(content) == "" { messages.AddErrorT("errors", "comment_empty") diff --git a/util/markdown.go b/util/markdown.go index 913b6525..71c2d0e5 100644 --- a/util/markdown.go +++ b/util/markdown.go @@ -38,3 +38,24 @@ func MarkdownToHTML(markdown string) template.HTML { html := bluemonday.UGCPolicy().SanitizeBytes(unsafe) return template.HTML(html) } + + +/* + * Sanitize a message passed as a string according to a setted model or allowing a set of html tags and output a string + */ +func Sanitize(msg string, elements ...string) string { + p := bluemonday.StrictPolicy() + if len(elements) > 0 { + if elements[0] == "default" { // default model + p.AllowElements("b", "strong", "em", "i", "u", "blockquote", "q") + p.AllowImages() + p.AllowStandardURLs() + p.AllowAttrs("cite").OnElements("blockquote", "q") + p.AllowAttrs("href").OnElements("a") + p.AddTargetBlankToFullyQualifiedLinks(true) + } else { // allowing set of html tags + p.AllowElements(elements...) + } + } + return p.Sanitize(msg) +} \ No newline at end of file From 21ac12c852efeb677e9bb028093237fe67b4f01f Mon Sep 17 00:00:00 2001 From: akuma06 Date: Wed, 24 May 2017 00:08:02 +0200 Subject: [PATCH 2/2] Some updates * Added support of UGC without div tag for torrents * Added support of basic html tags for comments (a, img, b, em, u) Fixed: * Bug with r *request becoming a nul pointer when loading from contextin messages.go --- router/view_torrent_handler.go | 2 +- service/user/cookie_helper.go | 5 +- util/markdown.go | 225 ++++++++++++++++++++++++++++++++- util/messages/messages.go | 6 +- 4 files changed, 230 insertions(+), 8 deletions(-) diff --git a/router/view_torrent_handler.go b/router/view_torrent_handler.go index 23da2a48..32d586ff 100644 --- a/router/view_torrent_handler.go +++ b/router/view_torrent_handler.go @@ -91,7 +91,7 @@ func PostCommentHandler(w http.ResponseWriter, r *http.Request) { messages.AddErrorT("errors", "bad_captcha") } } - content := util.Sanitize(r.FormValue("comment"), "") + content := util.Sanitize(r.FormValue("comment"), "comment") if strings.TrimSpace(content) == "" { messages.AddErrorT("errors", "comment_empty") diff --git a/service/user/cookie_helper.go b/service/user/cookie_helper.go index d4c2113e..af899d5c 100644 --- a/service/user/cookie_helper.go +++ b/service/user/cookie_helper.go @@ -11,6 +11,7 @@ import ( "github.com/gorilla/context" "github.com/gorilla/securecookie" "golang.org/x/crypto/bcrypt" + "fmt" "net/http" "strconv" "time" @@ -123,7 +124,9 @@ func RegisterHandler(w http.ResponseWriter, r *http.Request) (int, error) { func CurrentUser(r *http.Request) (model.User, error) { var user model.User var encoded string - + if r == nil { + fmt.Println("ERROR r is terminated") + } encoded = r.Header.Get("X-Auth-Token") if len(encoded) == 0 { // check cookie instead diff --git a/util/markdown.go b/util/markdown.go index 71c2d0e5..5c20a949 100644 --- a/util/markdown.go +++ b/util/markdown.go @@ -1,12 +1,17 @@ package util import ( + "bytes" + "html/template" + "log" + "regexp" + "strings" + "github.com/microcosm-cc/bluemonday" md "github.com/russross/blackfriday" - - "html/template" - "strings" + "golang.org/x/net/html" ) + //Some default rules, plus and minus some. var mdOptions = 0 | md.EXTENSION_AUTOLINK | @@ -44,9 +49,204 @@ func MarkdownToHTML(markdown string) template.HTML { * Sanitize a message passed as a string according to a setted model or allowing a set of html tags and output a string */ func Sanitize(msg string, elements ...string) string { - p := bluemonday.StrictPolicy() + msg = repairHTMLTags(msg) // We repair possible broken html tags + p := bluemonday.NewPolicy() if len(elements) > 0 { - if elements[0] == "default" { // default model + if elements[0] == "default" { // default model same as UGC without div + /////////////////////// + // Global attributes // + /////////////////////// + + // "class" is not permitted as we are not allowing users to style their own + // content + + p.AllowStandardAttributes() + + ////////////////////////////// + // Global URL format policy // + ////////////////////////////// + + p.AllowStandardURLs() + + //////////////////////////////// + // Declarations and structure // + //////////////////////////////// + + // "xml" "xslt" "DOCTYPE" "html" "head" are not permitted as we are + // expecting user generated content to be a fragment of HTML and not a full + // document. + + ////////////////////////// + // Sectioning root tags // + ////////////////////////// + + // "article" and "aside" are permitted and takes no attributes + p.AllowElements("article", "aside") + + // "body" is not permitted as we are expecting user generated content to be a fragment + // of HTML and not a full document. + + // "details" is permitted, including the "open" attribute which can either + // be blank or the value "open". + p.AllowAttrs( + "open", + ).Matching(regexp.MustCompile(`(?i)^(|open)$`)).OnElements("details") + + // "fieldset" is not permitted as we are not allowing forms to be created. + + // "figure" is permitted and takes no attributes + p.AllowElements("figure") + + // "nav" is not permitted as it is assumed that the site (and not the user) + // has defined navigation elements + + // "section" is permitted and takes no attributes + p.AllowElements("section") + + // "summary" is permitted and takes no attributes + p.AllowElements("summary") + + ////////////////////////// + // Headings and footers // + ////////////////////////// + + // "footer" is not permitted as we expect user content to be a fragment and + // not structural to this extent + + // "h1" through "h6" are permitted and take no attributes + p.AllowElements("h1", "h2", "h3", "h4", "h5", "h6") + + // "header" is not permitted as we expect user content to be a fragment and + // not structural to this extent + + // "hgroup" is permitted and takes no attributes + p.AllowElements("hgroup") + + ///////////////////////////////////// + // Content grouping and separating // + ///////////////////////////////////// + + // "blockquote" is permitted, including the "cite" attribute which must be + // a standard URL. + p.AllowAttrs("cite").OnElements("blockquote") + + // "br" "div" "hr" "p" "span" "wbr" are permitted and take no attributes + p.AllowElements("br", "hr", "p", "span", "wbr") + + /////////// + // Links // + /////////// + + // "a" is permitted + p.AllowAttrs("href").OnElements("a") + + // "area" is permitted along with the attributes that map image maps work + p.AllowAttrs("name").Matching( + regexp.MustCompile(`^([\p{L}\p{N}_-]+)$`), + ).OnElements("map") + p.AllowAttrs("alt").Matching(bluemonday.Paragraph).OnElements("area") + p.AllowAttrs("coords").Matching( + regexp.MustCompile(`^([0-9]+,)+[0-9]+$`), + ).OnElements("area") + p.AllowAttrs("href").OnElements("area") + p.AllowAttrs("rel").Matching(bluemonday.SpaceSeparatedTokens).OnElements("area") + p.AllowAttrs("shape").Matching( + regexp.MustCompile(`(?i)^(default|circle|rect|poly)$`), + ).OnElements("area") + p.AllowAttrs("usemap").Matching( + regexp.MustCompile(`(?i)^#[\p{L}\p{N}_-]+$`), + ).OnElements("img") + + // "link" is not permitted + + ///////////////////// + // Phrase elements // + ///////////////////// + + // The following are all inline phrasing elements + p.AllowElements("abbr", "acronym", "cite", "code", "dfn", "em", + "figcaption", "mark", "s", "samp", "strong", "sub", "sup", "var") + + // "q" is permitted and "cite" is a URL and handled by URL policies + p.AllowAttrs("cite").OnElements("q") + + // "time" is permitted + p.AllowAttrs("datetime").Matching(bluemonday.ISO8601).OnElements("time") + + //////////////////// + // Style elements // + //////////////////// + + // block and inline elements that impart no semantic meaning but style the + // document + p.AllowElements("b", "i", "pre", "small", "strike", "tt", "u") + + // "style" is not permitted as we are not yet sanitising CSS and it is an + // XSS attack vector + + ////////////////////// + // HTML5 Formatting // + ////////////////////// + + // "bdi" "bdo" are permitted + p.AllowAttrs("dir").Matching(bluemonday.Direction).OnElements("bdi", "bdo") + + // "rp" "rt" "ruby" are permitted + p.AllowElements("rp", "rt", "ruby") + + /////////////////////////// + // HTML5 Change tracking // + /////////////////////////// + + // "del" "ins" are permitted + p.AllowAttrs("cite").Matching(bluemonday.Paragraph).OnElements("del", "ins") + p.AllowAttrs("datetime").Matching(bluemonday.ISO8601).OnElements("del", "ins") + + /////////// + // Lists // + /////////// + + p.AllowLists() + + //////////// + // Tables // + //////////// + + p.AllowTables() + + /////////// + // Forms // + /////////// + + // By and large, forms are not permitted. However there are some form + // elements that can be used to present data, and we do permit those + // + // "button" "fieldset" "input" "keygen" "label" "output" "select" "datalist" + // "textarea" "optgroup" "option" are all not permitted + + // "meter" is permitted + p.AllowAttrs( + "value", + "min", + "max", + "low", + "high", + "optimum", + ).Matching(bluemonday.Number).OnElements("meter") + + // "progress" is permitted + p.AllowAttrs("value", "max").Matching(bluemonday.Number).OnElements("progress") + + ////////////////////// + // Embedded content // + ////////////////////// + + // Vast majority not permitted + // "audio" "canvas" "embed" "iframe" "object" "param" "source" "svg" "track" + // "video" are all not permitted + + p.AllowImages() + } else if elements[0] == "comment" { p.AllowElements("b", "strong", "em", "i", "u", "blockquote", "q") p.AllowImages() p.AllowStandardURLs() @@ -58,4 +258,19 @@ func Sanitize(msg string, elements ...string) string { } } return p.Sanitize(msg) +} + +/* + * Should close any opened tags and strip any empty end tags + */ +func repairHTMLTags(brokenHtml string) string { + reader := strings.NewReader(brokenHtml) + root, err := html.Parse(reader) + if err != nil { + log.Fatal(err) + } + var b bytes.Buffer + html.Render(&b, root) + fixedHTML := b.String() + return fixedHTML } \ No newline at end of file diff --git a/util/messages/messages.go b/util/messages/messages.go index 8344048f..3cca4103 100644 --- a/util/messages/messages.go +++ b/util/messages/messages.go @@ -18,7 +18,11 @@ type Messages struct { func GetMessages(r *http.Request) *Messages { if rv := context.Get(r, MessagesKey); rv != nil { - return rv.(*Messages) + mes := rv.(*Messages) + T, _ := languages.GetTfuncAndLanguageFromRequest(r) + mes.T = T + mes.r = r + return mes } else { context.Set(r, MessagesKey, &Messages{}) T, _ := languages.GetTfuncAndLanguageFromRequest(r)