@@ -86,16 +86,20 @@ public function readingTime(): string
8686 }
8787
8888 /**
89- * Count words in text.
90- *
89+ * Unicode-safe words count in text.
9190 * @return int
9291 */
9392 public function wordCount (): int
9493 {
95- // str_word_count handles many edge cases; fallback to zero for empty
9694 $ trimmed = $ this ->getText ();
95+ if ($ trimmed === '' ) {
96+ return 0 ;
97+ }
9798
98- return $ trimmed === '' ? 0 : str_word_count ($ trimmed );
99+ // Match "words" in any language (letters, numbers, marks, emojis, etc.)
100+ preg_match_all ('/[\p{L}\p{N}\p{M}]+/u ' , $ trimmed , $ matches );
101+
102+ return count ($ matches [0 ]);
99103 }
100104
101105 /**
@@ -106,44 +110,56 @@ public function wordCount(): int
106110 */
107111 public function charCount (bool $ includeSpaces = true ): int
108112 {
113+ $ text = $ this ->text ;
114+
115+ if (function_exists ('mb_strlen ' )) {
116+ if ($ includeSpaces ) {
117+ return mb_strlen ($ text , 'UTF-8 ' );
118+ }
119+ return mb_strlen (preg_replace ('/\s+/u ' , '' , $ text ), 'UTF-8 ' );
120+ }
121+
109122 if ($ includeSpaces ) {
110- return strlen ($ this -> text );
123+ return strlen ($ text );
111124 }
125+
112126 // remove all whitespace chars, not just spaces
113- return strlen (preg_replace ('/\s+/ ' , '' , $ this -> text ));
127+ return strlen (preg_replace ('/\s+/u ' , '' , $ text ));
114128 }
115129
116130 /**
117131 * Count text sentences (approximate) by splitting on punctuation.
118- *
119132 * @return int
120133 */
121134 public function sentenceCount (): int
122135 {
123136 $ parts = preg_split ('/[.!?]+(?:\s|$)/u ' , $ this ->getText ());
124- if ($ parts === false ) return 0 ;
137+ if ($ parts === false ){
138+ return 0 ;
139+ }
140+
125141 $ filtered = array_filter (array_map ('trim ' , $ parts ), fn ($ p ) => $ p !== '' );
126142 return count ($ filtered );
127143 }
128144
129145 /**
130146 * Reverse text string (simple).
131- *
132147 * For multibyte safe reversal, a more advanced routine is required.
133- *
134148 * @return string
135149 */
136150 public function reverse (): string
137151 {
152+ $ text = $ this ->text ;
153+
138154 // Multibyte-safe reversal if mb functions exist
139155 if (function_exists ('mb_strlen ' )) {
140156 $ out = '' ;
141- for ($ i = mb_strlen ($ this -> text ) - 1 ; $ i >= 0 ; $ i --) {
142- $ out .= mb_substr ($ this -> text , $ i , 1 );
157+ for ($ i = mb_strlen ($ text ) - 1 ; $ i >= 0 ; $ i --) {
158+ $ out .= mb_substr ($ text , $ i , 1 );
143159 }
144160 return $ out ;
145161 }
146- return strrev ( $ this -> text );
162+ return Str:: reverse ( $ text );
147163 }
148164
149165 /**
@@ -154,7 +170,7 @@ public function reverse(): string
154170 public function isPalindrome (): bool
155171 {
156172 // keep only alphanumeric chars
157- $ clean = preg_replace ('/[^a-z0-9]/i ' , '' , $ this ->getText ());
173+ $ clean = preg_replace ('/[^a-z0-9]/iu ' , '' , $ this ->getText ());
158174 if ($ clean === null ) return false ;
159175 $ clean = mb_strtolower ($ clean );
160176 // reverse using multibyte-safe method
0 commit comments