From 58e779affa1d50f0b5a468db1d6a79ee2aa947c1 Mon Sep 17 00:00:00 2001 From: Alexander Scheel Date: Sun, 19 Apr 2020 09:46:34 -0400 Subject: [PATCH 1/6] Update documentation Signed-off-by: Alexander Scheel --- custom/conf/app.ini.sample | 6 ++++-- docs/content/doc/advanced/config-cheat-sheet.en-us.md | 8 ++++++-- docs/content/doc/advanced/external-renderers.en-us.md | 8 ++++++-- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/custom/conf/app.ini.sample b/custom/conf/app.ini.sample index 556f93a91daf1..d21feeba44143 100644 --- a/custom/conf/app.ini.sample +++ b/custom/conf/app.ini.sample @@ -963,8 +963,10 @@ SHOW_FOOTER_VERSION = true ; Show template execution time in the footer SHOW_FOOTER_TEMPLATE_LOAD_TIME = true -[markup.sanitizer] -; The following keys can be used multiple times to define sanitation policy rules. +[markup.sanitizer.1] +; The following keys can appear once to define a sanitation policy rule. +; This section can appear with an incremenented number to define multiple rules. +; e.g., [markup.sanitizer.1] -> [markup.sanitizer.2] ;ELEMENT = span ;ALLOW_ATTR = class ;REGEXP = ^(info|warning|error)$ diff --git a/docs/content/doc/advanced/config-cheat-sheet.en-us.md b/docs/content/doc/advanced/config-cheat-sheet.en-us.md index 00f086e5468e1..0fece774fd5a9 100644 --- a/docs/content/doc/advanced/config-cheat-sheet.en-us.md +++ b/docs/content/doc/advanced/config-cheat-sheet.en-us.md @@ -646,7 +646,7 @@ Two special environment variables are passed to the render command: Gitea supports customizing the sanitization policy for rendered HTML. The example below will support KaTeX output from pandoc. ```ini -[markup.sanitizer] +[markup.sanitizer.1] ; Pandoc renders TeX segments as s with the "math" class, optionally ; with "inline" or "display" classes depending on context. ELEMENT = span @@ -658,7 +658,11 @@ REGEXP = ^\s*((math(\s+|$)|inline(\s+|$)|display(\s+|$)))+ - `ALLOW_ATTR`: The attribute this policy allows. Must be non-empty. - `REGEXP`: A regex to match the contents of the attribute against. Must be present but may be empty for unconditional whitelisting of this attribute. -You may redefine `ELEMENT`, `ALLOW_ATTR`, and `REGEXP` multiple times; each time all three are defined is a single policy entry. +You must define `ELEMENT`, `ALLOW_ATTR`, and `REGEXP` in each numbered section. + +To define multiple entries, increment the number in the section (e.g., `[markup.sanitizer.1]` and `[markup.sanitizer.2]`). + +**Note**: The above section numbering policy is new; previously the section was `[markup.sanitizer]` and keys could be redefined. ## Time (`time`) diff --git a/docs/content/doc/advanced/external-renderers.en-us.md b/docs/content/doc/advanced/external-renderers.en-us.md index 2d8945d0052c4..5ec8e56e5bf1d 100644 --- a/docs/content/doc/advanced/external-renderers.en-us.md +++ b/docs/content/doc/advanced/external-renderers.en-us.md @@ -73,7 +73,7 @@ IS_INPUT_FILE = false If your external markup relies on additional classes and attributes on the generated HTML elements, you might need to enable custom sanitizer policies. Gitea uses the [`bluemonday`](https://godoc.org/github.com/microcosm-cc/bluemonday) package as our HTML sanitizier. The example below will support [KaTeX](https://katex.org/) output from [`pandoc`](https://pandoc.org/). ```ini -[markup.sanitizer] +[markup.sanitizer.1] ; Pandoc renders TeX segments as s with the "math" class, optionally ; with "inline" or "display" classes depending on context. ELEMENT = span @@ -86,6 +86,10 @@ FILE_EXTENSIONS = .md,.markdown RENDER_COMMAND = pandoc -f markdown -t html --katex ``` -You may redefine `ELEMENT`, `ALLOW_ATTR`, and `REGEXP` multiple times; each time all three are defined is a single policy entry. All three must be defined, but `REGEXP` may be blank to allow unconditional whitelisting of that attribute. +You must define `ELEMENT`, `ALLOW_ATTR`, and `REGEXP` in each numbered section. + +To define multiple entries, increment the number in the section (e.g., `[markup.sanitizer.1]` and `[markup.sanitizer.2]`). Once your configuration changes have been made, restart Gitea to have changes take effect. + +**Note**: The above section numbering policy is new; previously the section was `[markup.sanitizer]` and keys could be redefined. From 4042adb7e45ce91a3443631db2a9d4b5221e08f5 Mon Sep 17 00:00:00 2001 From: Alexander Scheel Date: Sun, 19 Apr 2020 10:15:54 -0400 Subject: [PATCH 2/6] Add multiple rule support to markup sanitizer Resolves: 9888 Signed-off-by: Alexander Scheel --- modules/setting/markup.go | 58 +++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 33 deletions(-) diff --git a/modules/setting/markup.go b/modules/setting/markup.go index 75e6d651bdde2..1dd76243e6d6e 100644 --- a/modules/setting/markup.go +++ b/modules/setting/markup.go @@ -44,7 +44,7 @@ func newMarkup() { continue } - if name == "sanitizer" { + if name == "sanitizer" || strings.HasPrefix(name, "sanitizer.") { newMarkupSanitizer(name, sec) } else { newMarkupRenderer(name, sec) @@ -67,44 +67,36 @@ func newMarkupSanitizer(name string, sec *ini.Section) { return } - elements := sec.Key("ELEMENT").ValueWithShadows() - allowAttrs := sec.Key("ALLOW_ATTR").ValueWithShadows() - regexps := sec.Key("REGEXP").ValueWithShadows() + elements := sec.Key("ELEMENT").Value() + allowAttrs := sec.Key("ALLOW_ATTR").Value() + regexpStr := sec.Key("REGEXP").Value() - if len(elements) != len(allowAttrs) || - len(elements) != len(regexps) { - log.Error("All three keys in markup.%s (ELEMENT, ALLOW_ATTR, REGEXP) must be defined the same number of times! Got %d, %d, and %d respectively.", name, len(elements), len(allowAttrs), len(regexps)) + if regexpStr == "" { + rule := MarkupSanitizerRule{ + Element: elements, + AllowAttr: allowAttrs, + Regexp: nil, + } + + ExternalSanitizerRules = append(ExternalSanitizerRules, rule) return } - ExternalSanitizerRules = make([]MarkupSanitizerRule, 0, len(elements)) - - for index, pattern := range regexps { - if pattern == "" { - rule := MarkupSanitizerRule{ - Element: elements[index], - AllowAttr: allowAttrs[index], - Regexp: nil, - } - ExternalSanitizerRules = append(ExternalSanitizerRules, rule) - continue - } - - // Validate when parsing the config that this is a valid regular - // expression. Then we can use regexp.MustCompile(...) later. - compiled, err := regexp.Compile(pattern) - if err != nil { - log.Error("In module.%s: REGEXP at definition %d failed to compile: %v", name, index+1, err) - continue - } + // Validate when parsing the config that this is a valid regular + // expression. Then we can use regexp.MustCompile(...) later. + compiled, err := regexp.Compile(regexpStr) + if err != nil { + log.Error("In module.%s: REGEXP (%s) at definition %d failed to compile: %v", regexpStr, name, err) + return + } - rule := MarkupSanitizerRule{ - Element: elements[index], - AllowAttr: allowAttrs[index], - Regexp: compiled, - } - ExternalSanitizerRules = append(ExternalSanitizerRules, rule) + rule := MarkupSanitizerRule{ + Element: elements, + AllowAttr: allowAttrs, + Regexp: compiled, } + + ExternalSanitizerRules = append(ExternalSanitizerRules, rule) } func newMarkupRenderer(name string, sec *ini.Section) { From 70941e97edf9f3b8f3ebe8aad6a7ea7feab4f111 Mon Sep 17 00:00:00 2001 From: Alexander Scheel Date: Mon, 20 Apr 2020 11:40:20 -0400 Subject: [PATCH 3/6] Clarify sanitizer documentation Signed-off-by: Alexander Scheel --- .../doc/advanced/config-cheat-sheet.en-us.md | 38 +++++++++++++++++-- .../doc/advanced/external-renderers.en-us.md | 7 ++-- 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/docs/content/doc/advanced/config-cheat-sheet.en-us.md b/docs/content/doc/advanced/config-cheat-sheet.en-us.md index 0fece774fd5a9..7e9a48e0daaa0 100644 --- a/docs/content/doc/advanced/config-cheat-sheet.en-us.md +++ b/docs/content/doc/advanced/config-cheat-sheet.en-us.md @@ -646,7 +646,7 @@ Two special environment variables are passed to the render command: Gitea supports customizing the sanitization policy for rendered HTML. The example below will support KaTeX output from pandoc. ```ini -[markup.sanitizer.1] +[markup.sanitizer.TeX] ; Pandoc renders TeX segments as s with the "math" class, optionally ; with "inline" or "display" classes depending on context. ELEMENT = span @@ -658,11 +658,41 @@ REGEXP = ^\s*((math(\s+|$)|inline(\s+|$)|display(\s+|$)))+ - `ALLOW_ATTR`: The attribute this policy allows. Must be non-empty. - `REGEXP`: A regex to match the contents of the attribute against. Must be present but may be empty for unconditional whitelisting of this attribute. -You must define `ELEMENT`, `ALLOW_ATTR`, and `REGEXP` in each numbered section. +**Note**: The above section naming policy is new; previously the section was `[markup.sanitizer]` and keys could be redefined. +Now, a unique identifier must appear in the section name (e.g., `[markup.sanitizer.TeX]`) in order to parse multiple rules. +This was changed because the implementation with the ini parser used was flawed; the following configs were indistinguishable after parsing: -To define multiple entries, increment the number in the section (e.g., `[markup.sanitizer.1]` and `[markup.sanitizer.2]`). +```ini +[markup.sanitizer] +ELEMENT = a +ALLOW_ATTR = target +REGEXP = $1 +ELEMENT = a +ALLOW_ATTR = rel +REGEXP = $2 +ELEMENT = img +ALLOW_ATTR = src +REGEXP = $3 +``` + +and + +```ini +[markup.sanitizer] +ELEMENT = a +ALLOW_ATTR = target +REGEXP = $1 +ELEMENT = img +ALLOW_ATTR = rel +REGEXP = $2 +ELEMENT = img +ALLOW_ATTR = src +REGEXP = $3 +``` + +Because of limitations in the ini library, we are unable to automatically migrate configurations. -**Note**: The above section numbering policy is new; previously the section was `[markup.sanitizer]` and keys could be redefined. +We will still parse the first rule from a `[markup.sanitizer]` section if present, but multiple rules must be manually migrated. ## Time (`time`) diff --git a/docs/content/doc/advanced/external-renderers.en-us.md b/docs/content/doc/advanced/external-renderers.en-us.md index 5ec8e56e5bf1d..2cbf2943df6f7 100644 --- a/docs/content/doc/advanced/external-renderers.en-us.md +++ b/docs/content/doc/advanced/external-renderers.en-us.md @@ -73,7 +73,7 @@ IS_INPUT_FILE = false If your external markup relies on additional classes and attributes on the generated HTML elements, you might need to enable custom sanitizer policies. Gitea uses the [`bluemonday`](https://godoc.org/github.com/microcosm-cc/bluemonday) package as our HTML sanitizier. The example below will support [KaTeX](https://katex.org/) output from [`pandoc`](https://pandoc.org/). ```ini -[markup.sanitizer.1] +[markup.sanitizer.TeX] ; Pandoc renders TeX segments as s with the "math" class, optionally ; with "inline" or "display" classes depending on context. ELEMENT = span @@ -86,9 +86,10 @@ FILE_EXTENSIONS = .md,.markdown RENDER_COMMAND = pandoc -f markdown -t html --katex ``` -You must define `ELEMENT`, `ALLOW_ATTR`, and `REGEXP` in each numbered section. +You must define `ELEMENT`, `ALLOW_ATTR`, and `REGEXP` in each section. -To define multiple entries, increment the number in the section (e.g., `[markup.sanitizer.1]` and `[markup.sanitizer.2]`). +To define multiple entries, define different section names (e.g., `[markup.sanitizer.1]` and `[markup.sanitizer.2]`). +These can be numbers, identifying names, or anything else. Once your configuration changes have been made, restart Gitea to have changes take effect. From 5226e1547dedf8e5f40b13a1b9ba4447a6d45721 Mon Sep 17 00:00:00 2001 From: Alexander Scheel Date: Tue, 21 Apr 2020 09:01:44 -0400 Subject: [PATCH 4/6] Sanitizer Rules -- additional documentation Signed-off-by: Alexander Scheel --- custom/conf/app.ini.sample | 4 ++-- docs/content/doc/advanced/config-cheat-sheet.en-us.md | 4 ++-- docs/content/doc/advanced/external-renderers.en-us.md | 1 + 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/custom/conf/app.ini.sample b/custom/conf/app.ini.sample index d21feeba44143..ffd98d94e9a92 100644 --- a/custom/conf/app.ini.sample +++ b/custom/conf/app.ini.sample @@ -965,8 +965,8 @@ SHOW_FOOTER_TEMPLATE_LOAD_TIME = true [markup.sanitizer.1] ; The following keys can appear once to define a sanitation policy rule. -; This section can appear with an incremenented number to define multiple rules. -; e.g., [markup.sanitizer.1] -> [markup.sanitizer.2] +; This section can appear again with a unique alphanmuric string to define multiple rules. +; e.g., [markup.sanitizer.1] -> [markup.sanitizer.2] -> [markup.sanitizer.TeX] ;ELEMENT = span ;ALLOW_ATTR = class ;REGEXP = ^(info|warning|error)$ diff --git a/docs/content/doc/advanced/config-cheat-sheet.en-us.md b/docs/content/doc/advanced/config-cheat-sheet.en-us.md index 7e9a48e0daaa0..1bf41dce53a35 100644 --- a/docs/content/doc/advanced/config-cheat-sheet.en-us.md +++ b/docs/content/doc/advanced/config-cheat-sheet.en-us.md @@ -658,8 +658,8 @@ REGEXP = ^\s*((math(\s+|$)|inline(\s+|$)|display(\s+|$)))+ - `ALLOW_ATTR`: The attribute this policy allows. Must be non-empty. - `REGEXP`: A regex to match the contents of the attribute against. Must be present but may be empty for unconditional whitelisting of this attribute. -**Note**: The above section naming policy is new; previously the section was `[markup.sanitizer]` and keys could be redefined. -Now, a unique identifier must appear in the section name (e.g., `[markup.sanitizer.TeX]`) in order to parse multiple rules. +**Note**: The above section naming policy is new to v1.12.0; previously the section was `[markup.sanitizer]` and keys could be redefined in v1.11.0. +Now, a unique identifier must appear in the section name (e.g., `[markup.sanitizer.TeX]`) in order to parse multiple rules and keys cannot be duplicated. This was changed because the implementation with the ini parser used was flawed; the following configs were indistinguishable after parsing: ```ini diff --git a/docs/content/doc/advanced/external-renderers.en-us.md b/docs/content/doc/advanced/external-renderers.en-us.md index 2cbf2943df6f7..be1532a24fb96 100644 --- a/docs/content/doc/advanced/external-renderers.en-us.md +++ b/docs/content/doc/advanced/external-renderers.en-us.md @@ -94,3 +94,4 @@ These can be numbers, identifying names, or anything else. Once your configuration changes have been made, restart Gitea to have changes take effect. **Note**: The above section numbering policy is new; previously the section was `[markup.sanitizer]` and keys could be redefined. +For more information, see the corresponding information in the cheat sheet. From 8cceae97c5008174001bd537bc53921c68d20596 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Tue, 28 Apr 2020 19:05:45 +0100 Subject: [PATCH 5/6] Fix documentation as per @guillep2k Signed-off-by: Andrew Thornton --- custom/conf/app.ini.sample | 2 +- .../doc/advanced/config-cheat-sheet.en-us.md | 36 +------------------ .../doc/advanced/external-renderers.en-us.md | 7 ++-- 3 files changed, 5 insertions(+), 40 deletions(-) diff --git a/custom/conf/app.ini.sample b/custom/conf/app.ini.sample index ffd98d94e9a92..377fa4492fea6 100644 --- a/custom/conf/app.ini.sample +++ b/custom/conf/app.ini.sample @@ -965,7 +965,7 @@ SHOW_FOOTER_TEMPLATE_LOAD_TIME = true [markup.sanitizer.1] ; The following keys can appear once to define a sanitation policy rule. -; This section can appear again with a unique alphanmuric string to define multiple rules. +; This section can appear multiple times by adding a unique alphanmuric suffix to define multiple rules. ; e.g., [markup.sanitizer.1] -> [markup.sanitizer.2] -> [markup.sanitizer.TeX] ;ELEMENT = span ;ALLOW_ATTR = class diff --git a/docs/content/doc/advanced/config-cheat-sheet.en-us.md b/docs/content/doc/advanced/config-cheat-sheet.en-us.md index 1bf41dce53a35..c94f6fc21ac8f 100644 --- a/docs/content/doc/advanced/config-cheat-sheet.en-us.md +++ b/docs/content/doc/advanced/config-cheat-sheet.en-us.md @@ -658,41 +658,7 @@ REGEXP = ^\s*((math(\s+|$)|inline(\s+|$)|display(\s+|$)))+ - `ALLOW_ATTR`: The attribute this policy allows. Must be non-empty. - `REGEXP`: A regex to match the contents of the attribute against. Must be present but may be empty for unconditional whitelisting of this attribute. -**Note**: The above section naming policy is new to v1.12.0; previously the section was `[markup.sanitizer]` and keys could be redefined in v1.11.0. -Now, a unique identifier must appear in the section name (e.g., `[markup.sanitizer.TeX]`) in order to parse multiple rules and keys cannot be duplicated. -This was changed because the implementation with the ini parser used was flawed; the following configs were indistinguishable after parsing: - -```ini -[markup.sanitizer] -ELEMENT = a -ALLOW_ATTR = target -REGEXP = $1 -ELEMENT = a -ALLOW_ATTR = rel -REGEXP = $2 -ELEMENT = img -ALLOW_ATTR = src -REGEXP = $3 -``` - -and - -```ini -[markup.sanitizer] -ELEMENT = a -ALLOW_ATTR = target -REGEXP = $1 -ELEMENT = img -ALLOW_ATTR = rel -REGEXP = $2 -ELEMENT = img -ALLOW_ATTR = src -REGEXP = $3 -``` - -Because of limitations in the ini library, we are unable to automatically migrate configurations. - -We will still parse the first rule from a `[markup.sanitizer]` section if present, but multiple rules must be manually migrated. +Multiple sanitisation rules can be defined by adding unique subsections, e.g. `[markup.sanitizer.TeX-2]`. ## Time (`time`) diff --git a/docs/content/doc/advanced/external-renderers.en-us.md b/docs/content/doc/advanced/external-renderers.en-us.md index be1532a24fb96..39b53018d467e 100644 --- a/docs/content/doc/advanced/external-renderers.en-us.md +++ b/docs/content/doc/advanced/external-renderers.en-us.md @@ -88,10 +88,9 @@ RENDER_COMMAND = pandoc -f markdown -t html --katex You must define `ELEMENT`, `ALLOW_ATTR`, and `REGEXP` in each section. -To define multiple entries, define different section names (e.g., `[markup.sanitizer.1]` and `[markup.sanitizer.2]`). -These can be numbers, identifying names, or anything else. +To define multiple entries, add a unique alphanumeric suffix (e.g., `[markup.sanitizer.1]` and `[markup.sanitizer.something]`). Once your configuration changes have been made, restart Gitea to have changes take effect. -**Note**: The above section numbering policy is new; previously the section was `[markup.sanitizer]` and keys could be redefined. -For more information, see the corresponding information in the cheat sheet. +**Note**: Prior to Gitea 1.12 there was a single `markup.sanitiser` section with keys that were redefined for multiple rules, however, +there were significant problems with this method of configuration necessitating configuration through multiple sections. \ No newline at end of file From adce502b0c42d99d4845fad2f007d02a4abe79e4 Mon Sep 17 00:00:00 2001 From: zeripath Date: Wed, 29 Apr 2020 07:05:29 +0100 Subject: [PATCH 6/6] Update custom/conf/app.ini.sample Co-Authored-By: guillep2k <18600385+guillep2k@users.noreply.github.com> --- custom/conf/app.ini.sample | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/custom/conf/app.ini.sample b/custom/conf/app.ini.sample index 6c743e162457a..bad89524ba554 100644 --- a/custom/conf/app.ini.sample +++ b/custom/conf/app.ini.sample @@ -977,7 +977,7 @@ SHOW_FOOTER_TEMPLATE_LOAD_TIME = true [markup.sanitizer.1] ; The following keys can appear once to define a sanitation policy rule. -; This section can appear multiple times by adding a unique alphanmuric suffix to define multiple rules. +; This section can appear multiple times by adding a unique alphanumeric suffix to define multiple rules. ; e.g., [markup.sanitizer.1] -> [markup.sanitizer.2] -> [markup.sanitizer.TeX] ;ELEMENT = span ;ALLOW_ATTR = class