88
99class Purify
1010{
11- private static $ purifierHtml ;
12- private static $ purifierDev ;
1311 private static $ purifierString ;
14-
12+
1513 /**
16- * Init CMS-safe purifier
14+ * Init plain string purifier
1715 */
18- private static function initHtml ()
16+ private static function initString ()
1917 {
20- if (!self ::$ purifierHtml ) {
18+ if (!self ::$ purifierString ) {
2119 $ config = HTMLPurifier_Config::createDefault ();
2220
23- // Core settings
24- $ config ->set ('HTML.SafeIframe ' , true );
25- $ config ->set ('AutoFormat.Linkify ' , true );
26- $ config ->set ('AutoFormat.AutoParagraph ' , true );
27- $ config ->set ('Core.EscapeInvalidTags ' , false );
28-
29- // Unique definition ID/revision
30- $ config ->set ('HTML.DefinitionID ' , 'cms-html5-purifier ' );
31- $ config ->set ('HTML.DefinitionRev ' , 2 );
32-
33- // Base allowed tags/attributes (only those HTMLPurifier natively supports)
34- $ config ->set ('HTML.Allowed ' , implode (', ' , [
35- 'a[href|title|target] ' ,
36- 'abbr[title] ' , 'acronym[title] ' ,
37- 'b ' , 'strong ' , 'i ' , 'em ' , 'u ' , 'strike ' ,
38- 'sub ' , 'sup ' ,
39- 'p ' , 'br ' , 'hr ' ,
40- 'h1 ' ,'h2 ' ,'h3 ' ,'h4 ' ,'h5 ' ,'h6 ' ,
41- 'blockquote[cite] ' ,
42- 'code ' , 'pre ' ,
43- 'ul ' ,'ol ' ,'li ' ,'dl ' ,'dt ' ,'dd ' ,
44- 'table ' ,'thead ' ,'tbody ' ,'tfoot ' ,'tr ' ,'th ' ,'td ' ,
45- 'img[src|alt|title|width|height] ' ,
46- 'div[style|class|id] ' ,
47- 'span[style|class|id] ' ,
48- ]));
49-
50- // Extend HTML5 support
51- if ($ def = $ config ->maybeGetRawHTMLDefinition ()) {
52- // Semantic HTML5
53- $ def ->addElement ('mark ' , 'Inline ' , 'Inline ' , 'Common ' );
54- $ def ->addElement ('header ' , 'Block ' , 'Flow ' , 'Common ' );
55- $ def ->addElement ('footer ' , 'Block ' , 'Flow ' , 'Common ' );
56- $ def ->addElement ('main ' , 'Block ' , 'Flow ' , 'Common ' );
57- $ def ->addElement ('section ' , 'Block ' , 'Flow ' , 'Common ' );
58- $ def ->addElement ('article ' , 'Block ' , 'Flow ' , 'Common ' );
59- $ def ->addElement ('aside ' , 'Block ' , 'Flow ' , 'Common ' );
60- $ def ->addElement ('figure ' , 'Block ' , 'Optional: (figcaption, Flow) ' , 'Common ' );
61- $ def ->addElement ('figcaption ' , 'Inline ' , 'Flow ' , 'Common ' );
62- $ def ->addElement ('nav ' , 'Block ' , 'Flow ' , 'Common ' );
63-
64- // Multimedia
65- $ def ->addElement ('audio ' , 'Block ' , 'Optional: Flow ' , 'Common ' , [
66- 'src ' => 'URI ' ,
67- 'controls ' => 'Bool ' ,
68- 'width ' => 'Length ' ,
69- 'height ' => 'Length ' ,
70- 'preload ' => 'Enum#auto,metadata,none '
71- ]);
72- $ def ->addElement ('video ' , 'Block ' , 'Optional: Flow ' , 'Common ' , [
73- 'src ' => 'URI ' ,
74- 'controls ' => 'Bool ' ,
75- 'width ' => 'Length ' ,
76- 'height ' => 'Length ' ,
77- 'poster ' => 'URI ' ,
78- 'preload ' => 'Enum#auto,metadata,none '
79- ]);
80- $ def ->addElement ('source ' , 'Block ' , 'Flow ' , 'Common ' , [
81- 'src ' => 'URI ' ,
82- 'type ' => 'Text '
83- ]);
84-
85- // iframe with extended attributes
86- $ def ->addElement ('iframe ' , 'Block ' , 'Flow ' , 'Common ' , [
87- 'src ' => 'URI ' ,
88- 'width ' => 'Length ' ,
89- 'height ' => 'Length ' ,
90- 'frameborder ' => 'Text ' ,
91- 'allow ' => 'Text ' ,
92- 'allowfullscreen ' => 'Bool '
93- ]);
94- }
95-
96- self ::$ purifierHtml = new \HTMLPurifier ($ config );
21+ // Strip all HTML safely including scripts
22+ $ config ->set ('HTML.Allowed ' , '' );
23+ $ config ->set ('HTML.Trusted ' , false );
24+
25+ self ::$ purifierString = new HTMLPurifier ($ config );
9726 }
9827 }
9928
10029 /**
101- * Init Dev-safe purifier (allow all tags, attributes, JS, style)
30+ * purifier
31+ *
32+ * @param array $settings
33+ * @return \HTMLPurifier
10234 */
103- private static function initDev ( )
35+ protected static function purifier ( $ settings = [] )
10436 {
105- if (!self ::$ purifierDev ) {
106- $ config = HTMLPurifier_Config::createDefault ();
107-
108- // Allow all HTML, including script/style for dev usage
109- $ config ->set ('HTML.Allowed ' , null );
110- $ config ->set ('HTML.SafeEmbed ' , true );
111- $ config ->set ('HTML.SafeObject ' , true );
112- $ config ->set ('HTML.SafeIframe ' , true );
113- $ config ->set ('CSS.AllowTricky ' , true );
114- $ config ->set ('HTML.Trusted ' , true ); // keep script/style for dev
37+ $ config = \HTMLPurifier_Config::createDefault ();
38+
39+ // Preserve formatting as-is
40+ $ config ->set ('Core.NormalizeNewlines ' , false );
41+ $ config ->set ('HTML.Trusted ' , true );
42+ $ config ->set ('Attr.EnableID ' , true );
43+ $ config ->set ('CSS.AllowTricky ' , true );
44+ $ config ->set ('Attr.AllowedFrameTargets ' , ['_blank ' ,'_self ' ,'_parent ' ,'_top ' ]);
45+ $ config ->set ('HTML.AllowedAttributes ' , null );
46+
47+ // Required when extending HTML5 support
48+ $ config ->set ('HTML.DefinitionID ' , 'custom-html5-definitions ' );
49+ $ config ->set ('HTML.DefinitionRev ' , 1 ); // bump this if you change definitions
50+
51+ // Merge custom overrides
52+ foreach ($ settings as $ key => $ val ) {
53+ $ config ->set ($ key , $ val );
54+ }
11555
116- self ::$ purifierDev = new HTMLPurifier ($ config );
56+ // ---- Extend HTML5 tags support ----
57+ if ($ def = $ config ->maybeGetRawHTMLDefinition ()) {
58+ // Structural / semantic tags
59+ $ def ->addElement ('section ' , 'Block ' , 'Flow ' , 'Common ' );
60+ $ def ->addElement ('article ' , 'Block ' , 'Flow ' , 'Common ' );
61+ $ def ->addElement ('aside ' , 'Block ' , 'Flow ' , 'Common ' );
62+ $ def ->addElement ('header ' , 'Block ' , 'Flow ' , 'Common ' );
63+ $ def ->addElement ('footer ' , 'Block ' , 'Flow ' , 'Common ' );
64+ $ def ->addElement ('main ' , 'Block ' , 'Flow ' , 'Common ' );
65+ $ def ->addElement ('figure ' , 'Block ' , 'Flow ' , 'Common ' );
66+ $ def ->addElement ('figcaption ' , 'Inline ' , 'Flow ' , 'Common ' );
67+
68+ // Media tags
69+ $ def ->addElement ('video ' , 'Block ' , 'Flow ' , 'Common ' , [
70+ 'src ' => 'URI ' ,
71+ 'type ' => 'Text ' ,
72+ 'width ' => 'Length ' ,
73+ 'height ' => 'Length ' ,
74+ 'poster ' => 'URI ' ,
75+ 'preload ' => 'Enum#auto,metadata,none ' ,
76+ 'controls ' => 'Bool ' ,
77+ 'autoplay ' => 'Bool ' ,
78+ 'loop ' => 'Bool ' ,
79+ 'muted ' => 'Bool ' ,
80+ ]);
81+
82+ $ def ->addElement ('audio ' , 'Block ' , 'Flow ' , 'Common ' , [
83+ 'src ' => 'URI ' ,
84+ 'preload ' => 'Enum#auto,metadata,none ' ,
85+ 'controls ' => 'Bool ' ,
86+ 'autoplay ' => 'Bool ' ,
87+ 'loop ' => 'Bool ' ,
88+ 'muted ' => 'Bool ' ,
89+ ]);
90+
91+ $ def ->addElement ('source ' , 'Block ' , 'Empty ' , 'Common ' , [
92+ 'src ' => 'URI ' ,
93+ 'type ' => 'Text ' ,
94+ ]);
95+
96+ // Time tag
97+ $ def ->addElement ('time ' , 'Inline ' , 'Inline ' , 'Common ' , [
98+ 'datetime ' => 'Text ' ,
99+ ]);
117100 }
101+
102+ return new HTMLPurifier ($ config );
118103 }
119104
120105 /**
121- * Init plain string purifier
106+ * Purify HTML for CMS/blog posts
122107 */
123- private static function initString ()
108+ public static function html ( string $ content ): string
124109 {
125- if (!self ::$ purifierString ) {
126- $ config = HTMLPurifier_Config::createDefault ();
127-
128- // Strip all HTML safely including scripts
129- $ config ->set ('HTML.Allowed ' , '' );
130- $ config ->set ('HTML.Trusted ' , false );
131-
132- self ::$ purifierString = new HTMLPurifier ($ config );
133- }
110+ // Allow almost everything for CMS (iframe, video, embeds)
111+ $ settings = [
112+ 'HTML.SafeIframe ' => true ,
113+ 'URI.SafeIframeRegexp ' => '%^(https?:)?//% ' , // allow external iframes
114+ 'HTML.SafeObject ' => true ,
115+ 'Output.FlashCompat ' => true ,
116+ ];
117+ return self ::purifier ($ settings )->purify ($ content );
134118 }
135119
136120 /**
137- * Purify HTML for CMS/blog posts
121+ * Purify for developer usage (keep all content including JS/style)
138122 */
139- public static function html (string $ content ): string
123+ public static function dev (string $ content ): string
140124 {
141- self ::initHtml ();
142- return self ::$ purifierHtml ->purify ($ content );
125+ // Allow code/pre/dev tags but still sanitize dangerous stuff
126+ $ settings = [
127+ 'HTML.SafeIframe ' => true ,
128+ 'URI.SafeIframeRegexp ' => '%^(https?:)?//% ' ,
129+ 'HTML.SafeObject ' => true ,
130+ 'Output.FlashCompat ' => true ,
131+ 'HTML.AllowedElements ' => null , // don't restrict, allow code-related tags too
132+ ];
133+
134+ return self ::purifier ($ settings )->purify ($ content );
143135 }
144136
145137 /**
@@ -153,11 +145,11 @@ public static function string(string $content): string
153145 }
154146
155147 /**
156- * Purify for developer usage (keep all content including JS/style)
148+ * Unsafe Purify HTML for CMS/blog posts
157149 */
158- public static function dev (string $ content ): string
150+ public static function raw (string $ content ): string
159151 {
160- self ::initDev ();
161- return self ::$ purifierDev ->purify ($ content );
152+ return $ content ;
162153 }
163- }
154+
155+ }
0 commit comments