basic/utf8: modernize utf8_is_valid_n a bit

author Mike Yuan <me@yhndnzj.com>

Thu, 9 May 2024 13:55:09 +0000 (21:55 +0800)

committer Mike Yuan <me@yhndnzj.com>

Sun, 16 Jun 2024 17:07:34 +0000 (19:07 +0200)
author Mike Yuan <me@yhndnzj.com>
Thu, 9 May 2024 13:55:09 +0000 (21:55 +0800)
committer Mike Yuan <me@yhndnzj.com>
Sun, 16 Jun 2024 17:07:34 +0000 (19:07 +0200)
diff --git a/src/basic/utf8.c b/src/basic/utf8.c

index 15deef1ffd0b0c8fcd769fef9a361482f89719bc..fd1e96c68ce914b88b0eb805a4847aa6c9b503c1 100644 (file)
--- a/src/basic/utf8.c
+++ b/src/basic/utf8.c
@@ -130,24 +130,24 @@ bool utf8_is_printable_newline(const char* str, size_t length, bool allow_newlin
          return true;
  }
  
-char *utf8_is_valid_n(const char *str, size_t len_bytes) {
+char* utf8_is_valid_n(const char *str, size_t len_bytes) {
          /* Check if the string is composed of valid utf8 characters. If length len_bytes is given, stop after
           * len_bytes. Otherwise, stop at NUL. */
  
          assert(str);
  
-        for (const char *p = str; len_bytes != SIZE_MAX ? (size_t) (p - str) < len_bytes : *p != '\0'; ) {
+        for (size_t i = 0; len_bytes != SIZE_MAX ? i < len_bytes : str[i] != '\0'; ) {
                  int len;
  
-                if (_unlikely_(*p == '\0') && len_bytes != SIZE_MAX)
+                if (_unlikely_(str[i] == '\0'))
                          return NULL; /* embedded NUL */
  
-                len = utf8_encoded_valid_unichar(p,
-                                                 len_bytes != SIZE_MAX ? len_bytes - (p - str) : SIZE_MAX);
+                len = utf8_encoded_valid_unichar(str + i,
+                                                 len_bytes != SIZE_MAX ? len_bytes - i : SIZE_MAX);
                  if (_unlikely_(len < 0))
                          return NULL; /* invalid character */
  
-                p += len;
+                i += len;
          }
  
          return (char*) str;
diff --git a/src/basic/utf8.h b/src/basic/utf8.h

index 962312c5fb92704e76c64a2b0a8d576d3c46d105..fbd3318987b0548816bccb15c90eaccee09bc2ed 100644 (file)
--- a/src/basic/utf8.h
+++ b/src/basic/utf8.h
@@ -14,9 +14,9 @@
  
  bool unichar_is_valid(char32_t c);
  
-char *utf8_is_valid_n(const char *str, size_t len_bytes) _pure_;
-static inline char *utf8_is_valid(const char *s) {
-        return utf8_is_valid_n(s, SIZE_MAX);
+char* utf8_is_valid_n(const char *str, size_t len_bytes) _pure_;
+static inline char* utf8_is_valid(const char *str) {
+        return utf8_is_valid_n(str, SIZE_MAX);
  }
  char *ascii_is_valid(const char *s) _pure_;
  char *ascii_is_valid_n(const char *str, size_t len);
author	Mike Yuan <me@yhndnzj.com>
	Thu, 9 May 2024 13:55:09 +0000 (21:55 +0800)
committer	Mike Yuan <me@yhndnzj.com>
	Sun, 16 Jun 2024 17:07:34 +0000 (19:07 +0200)
src/basic/utf8.c		patch \| blob \| history
src/basic/utf8.h		patch \| blob \| history