Skip to content

Commit 6c235ca

Browse files
Untility and Str update
1 parent f1c58df commit 6c235ca

File tree

3 files changed

+146
-14
lines changed

3 files changed

+146
-14
lines changed

src/Purify.php

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,104 @@ protected static function purifier($settings = [])
101101

102102
return new HTMLPurifier($config);
103103
}
104+
105+
/**
106+
* Preserve structural newlines
107+
*
108+
* @param string $content
109+
* @param bool $collapse
110+
* @return string
111+
*/
112+
protected static function preserveNewLine($content, $collapse = false)
113+
{
114+
$text = preg_replace('/<\s*br\s*\/?>/i', "\n", $content);
115+
$text = preg_replace('/<\/p\s*>/i', "\n\n", $text);
116+
$text = preg_replace('/<\/div\s*>/i', "\n\n", $text);
117+
$text = preg_replace('/<\/h[1-6]\s*>/i', "\n\n", $text);
118+
119+
// Collapse whitespace
120+
if($collapse){
121+
$text = preg_replace('/\s+/u', ' ', $text);
122+
}
123+
124+
return $text;
125+
}
126+
127+
/**
128+
* cleanUrlLink
129+
*
130+
* @param string $url
131+
* @return string
132+
*/
133+
protected static function cleanUrlLink($url)
134+
{
135+
// Clean URL: decode %xx + HTML entities
136+
$url = rawurldecode($url ?: '');
137+
138+
return html_entity_decode($url, ENT_QUOTES | ENT_HTML5, 'UTF-8');
139+
}
140+
141+
/**
142+
* Convert HTML content to readable string
143+
*
144+
* @param string $content
145+
* @param bool $allowUrl
146+
* @return string
147+
*/
148+
public static function readable(string $content, bool $allowUrl = true): string
149+
{
150+
$text = $content;
151+
$text = self::preserveNewLine($text, true);
152+
153+
// Handle all tags with link-like attributes (href, src, data-src, poster, etc.)
154+
$text = preg_replace_callback(
155+
'/<(a|img|iframe|video|audio|source|embed|track|script)[^>]+?(?:href|src|data-src|poster)=["\']([^"\']+)["\'][^>]*>(?:([\s\S]*?)<\/\1>)?/i',
156+
function ($matches) use($allowUrl) {
157+
if($allowUrl){
158+
$tag = strtolower($matches[1]);
159+
$url = self::cleanUrlLink($matches[2]);
160+
$alt = trim($matches[3] ?? ''); // alt attr if exists
161+
$label = trim(strip_tags($matches[4] ?? '')); // inner text if exists
162+
163+
switch ($tag) {
164+
case 'a':
165+
// Prefer label, otherwise fall back to domain
166+
return !empty($label) ? "[$label]" : "[link]";
167+
case 'img':
168+
return !empty($alt) ? "[$alt]" : "[image]";
169+
case 'iframe':
170+
case 'video':
171+
case 'audio':
172+
case 'source':
173+
case 'embed':
174+
case 'track':
175+
case 'script':
176+
// Prefer url if any
177+
if (!empty($url)) {
178+
return "[$url]";
179+
}
180+
return "[$tag]";
181+
default:
182+
return "[$tag]";
183+
}
184+
} else{
185+
return "";
186+
}
187+
},
188+
$text
189+
);
190+
191+
// Remove all other HTML tags
192+
$text = strip_tags($text);
193+
194+
// Decode HTML entities (&amp; → & etc.)
195+
$text = html_entity_decode($text, ENT_QUOTES | ENT_HTML5, 'UTF-8');
196+
197+
return html_entity_decode(
198+
trim($text),
199+
ENT_QUOTES | ENT_HTML5, 'UTF-8'
200+
);
201+
}
104202

105203
/**
106204
* Purify HTML for CMS/blog posts

src/Str.php

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
namespace Tamedevelopers\Support;
66

7+
use Tamedevelopers\Support\Purify;
78
use Tamedevelopers\Support\Traits\StrTrait;
89

910
class Str
@@ -212,6 +213,7 @@ public static function sortMultipleArray(&$array = [], $key = null, $type = 'asc
212213

213214
/**
214215
* Alias for changeKeysFromArray() method
216+
* - Rename keys of an Array
215217
*
216218
* @param array $array
217219
* @param array|string $fromKey
@@ -225,6 +227,7 @@ public static function renameArrayKeys($array, $fromKey, $toKey = null)
225227

226228
/**
227229
* Alias for removeKeysFromArray() method.
230+
* - Remove keys from an Array
228231
*
229232
* @param array $array
230233
* @param string|array $keys
@@ -237,6 +240,7 @@ public static function forgetArrayKeys($array, ...$keys)
237240

238241
/**
239242
* Alias for convertArrayKey() method.
243+
* - Convert array keys to specified key if available, else return the original array.
240244
*
241245
* @param array $array The input data array.
242246
* @param string $key The key to use for conversion.
@@ -342,6 +346,7 @@ public static function bindings(array $bindings)
342346

343347
/**
344348
* Alias for flattenValue() method.
349+
* - Flatten a multidimensional array into a single-dimensional array.
345350
*
346351
* @param array $array The multidimensional array to flatten.
347352
* @return array The flattened array.
@@ -544,6 +549,18 @@ public static function sanitize($string = null)
544549
return Tame::filter_input($string);
545550
}
546551

552+
/**
553+
* Convert HTML content to readable string
554+
*
555+
* @param string $content
556+
* @param bool $allowUrl
557+
* @return string
558+
*/
559+
public static function readable(string $content, bool $allowUrl = true)
560+
{
561+
return Purify::readable($content, $allowUrl);
562+
}
563+
547564
/**
548565
* Format number to nearest thousand
549566
*
@@ -1058,6 +1075,7 @@ public static function generateRandomWords(int $wordCount, int $minLength = 3, i
10581075

10591076
/**
10601077
* Alias for (getFileExtension) method
1078+
* - Get the file extension from a filename or path.
10611079
*
10621080
* @param string $filename
10631081
* @return string|null

src/Traits/TextUtilityTrait.php

Lines changed: 30 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -86,16 +86,20 @@ public function readingTime(): string
8686
}
8787

8888
/**
89-
* Count words in text.
90-
*
89+
* Unicode-safe words count in text.
9190
* @return int
9291
*/
9392
public function wordCount(): int
9493
{
95-
// str_word_count handles many edge cases; fallback to zero for empty
9694
$trimmed = $this->getText();
95+
if ($trimmed === '') {
96+
return 0;
97+
}
9798

98-
return $trimmed === '' ? 0 : str_word_count($trimmed);
99+
// Match "words" in any language (letters, numbers, marks, emojis, etc.)
100+
preg_match_all('/[\p{L}\p{N}\p{M}]+/u', $trimmed, $matches);
101+
102+
return count($matches[0]);
99103
}
100104

101105
/**
@@ -106,44 +110,56 @@ public function wordCount(): int
106110
*/
107111
public function charCount(bool $includeSpaces = true): int
108112
{
113+
$text = $this->text;
114+
115+
if (function_exists('mb_strlen')) {
116+
if ($includeSpaces) {
117+
return mb_strlen($text, 'UTF-8');
118+
}
119+
return mb_strlen(preg_replace('/\s+/u', '', $text), 'UTF-8');
120+
}
121+
109122
if ($includeSpaces) {
110-
return strlen($this->text);
123+
return strlen($text);
111124
}
125+
112126
// remove all whitespace chars, not just spaces
113-
return strlen(preg_replace('/\s+/', '', $this->text));
127+
return strlen(preg_replace('/\s+/u', '', $text));
114128
}
115129

116130
/**
117131
* Count text sentences (approximate) by splitting on punctuation.
118-
*
119132
* @return int
120133
*/
121134
public function sentenceCount(): int
122135
{
123136
$parts = preg_split('/[.!?]+(?:\s|$)/u', $this->getText());
124-
if ($parts === false) return 0;
137+
if ($parts === false){
138+
return 0;
139+
}
140+
125141
$filtered = array_filter(array_map('trim', $parts), fn($p) => $p !== '');
126142
return count($filtered);
127143
}
128144

129145
/**
130146
* Reverse text string (simple).
131-
*
132147
* For multibyte safe reversal, a more advanced routine is required.
133-
*
134148
* @return string
135149
*/
136150
public function reverse(): string
137151
{
152+
$text = $this->text;
153+
138154
// Multibyte-safe reversal if mb functions exist
139155
if (function_exists('mb_strlen')) {
140156
$out = '';
141-
for ($i = mb_strlen($this->text) - 1; $i >= 0; $i--) {
142-
$out .= mb_substr($this->text, $i, 1);
157+
for ($i = mb_strlen($text) - 1; $i >= 0; $i--) {
158+
$out .= mb_substr($text, $i, 1);
143159
}
144160
return $out;
145161
}
146-
return strrev($this->text);
162+
return Str::reverse($text);
147163
}
148164

149165
/**
@@ -154,7 +170,7 @@ public function reverse(): string
154170
public function isPalindrome(): bool
155171
{
156172
// keep only alphanumeric chars
157-
$clean = preg_replace('/[^a-z0-9]/i', '', $this->getText());
173+
$clean = preg_replace('/[^a-z0-9]/iu', '', $this->getText());
158174
if ($clean === null) return false;
159175
$clean = mb_strtolower($clean);
160176
// reverse using multibyte-safe method

0 commit comments

Comments
 (0)