ArkScript
A small, fast, functional and scripting language for video games
utf8_char.hpp
Go to the documentation of this file.
1
#ifndef SRC_UTF8_CHAR_HPP
2
#define SRC_UTF8_CHAR_HPP
3
4
#include <array>
5
#include <string>
6
#include <limits>
7
8
#undef max
9
10
namespace
Ark::internal
11
{
12
class
utf8_char_t
13
{
14
public
:
15
using
codepoint_t
= int;
16
using
length_t
=
unsigned
char;
17
using
repr_t
= std::array<unsigned char, 5>;
18
19
utf8_char_t
() :
20
m_codepoint
(0),
m_length
(0),
m_repr
({ 0 }) {}
21
22
utf8_char_t
(
const
codepoint_t
cp,
const
length_t
len,
const
repr_t
repr) :
23
m_codepoint
(cp),
m_length
(len),
m_repr
(repr) {}
24
25
/**
26
* @brief Parse a codepoint and compute its length and representation
27
* @details https://github.com/sheredom/utf8.h/blob/4e4d828174c35e4564c31a9e35580c299c69a063/utf8.h#L1178
28
* @param it iterator in a string
29
* @param end end iterator, used to avoid going out of bound
30
* @return std::pair<std::string::iterator, utf8_char_t> the iterator points to the beginning of the next codepoint, the utf8_char_t represents the parsed codepoint
31
*/
32
static
std::pair<std::string::iterator, utf8_char_t>
at
(
const
std::string::iterator it,
const
std::string::iterator end)
33
{
34
codepoint_t
cp;
35
length_t
length;
36
repr_t
repr = {};
37
38
if
(0xf0 == (0xf8 & *it))
// 4 byte utf8 codepoint
39
{
40
if
(it + 3 == end || it + 2 == end || it + 1 == end)
41
return
std::make_pair(end,
utf8_char_t
{});
42
43
cp = (
static_cast<
codepoint_t
>
(0x07 & *it) << 18) |
44
(
static_cast<
codepoint_t
>
(0x3f & *(it + 1)) << 12) |
45
(
static_cast<
codepoint_t
>
(0x3f & *(it + 2)) << 6) |
46
static_cast<
codepoint_t
>
(0x3f & *(it + 3));
47
length = 4;
48
}
49
else
if
(0xe0 == (0xf0 & *it))
// 3 byte utf8 codepoint
50
{
51
if
(it + 2 == end || it + 1 == end)
52
return
std::make_pair(end,
utf8_char_t
{});
53
54
cp = (
static_cast<
codepoint_t
>
(0x0f & *it) << 12) |
55
(
static_cast<
codepoint_t
>
(0x3f & *(it + 1)) << 6) |
56
static_cast<
codepoint_t
>
(0x3f & *(it + 2));
57
length = 3;
58
}
59
else
if
(0xc0 == (0xe0 & *it))
// 2 byte utf8 codepoint
60
{
61
if
(it + 1 == end)
62
return
std::make_pair(end,
utf8_char_t
{});
63
64
cp = (
static_cast<
codepoint_t
>
(0x1f & *it) << 6) |
65
static_cast<
codepoint_t
>
(0x3f & *(it + 1));
66
length = 2;
67
}
68
else
// 1 byte utf8 codepoint otherwise
69
{
70
cp =
static_cast<
unsigned
char
>
(*it);
71
length = 1;
72
}
73
74
for
(
length_t
i = 0; i < length; ++i)
75
repr[i] =
static_cast<
unsigned
char
>
(*(it +
static_cast<
int
>
(i)));
76
77
return
std::make_pair(it +
static_cast<
long
>
(length),
78
utf8_char_t
(cp, length, repr));
79
}
80
81
/**
82
*
83
* @return true if the given codepoint is printable according to std::isprint
84
*/
85
[[nodiscard]]
bool
isPrintable
()
const
86
{
87
if
(
m_codepoint
< std::numeric_limits<char>::max())
88
return
std::isprint(
m_codepoint
);
89
return
true
;
90
}
91
92
[[nodiscard]]
const
char
*
c_str
()
const
{
return
reinterpret_cast<
const
char
*
>
(
m_repr
.data()); }
93
[[nodiscard]] std::size_t
size
()
const
{
return
m_length
; }
94
[[nodiscard]]
codepoint_t
codepoint
()
const
{
return
m_codepoint
; }
95
96
private
:
97
codepoint_t
m_codepoint
;
98
length_t
m_length
;
99
repr_t
m_repr
;
100
};
101
}
102
103
#endif
Ark::internal::utf8_char_t
Definition
utf8_char.hpp:13
Ark::internal::utf8_char_t::utf8_char_t
utf8_char_t()
Definition
utf8_char.hpp:19
Ark::internal::utf8_char_t::utf8_char_t
utf8_char_t(const codepoint_t cp, const length_t len, const repr_t repr)
Definition
utf8_char.hpp:22
Ark::internal::utf8_char_t::codepoint_t
int codepoint_t
Definition
utf8_char.hpp:15
Ark::internal::utf8_char_t::m_codepoint
codepoint_t m_codepoint
Definition
utf8_char.hpp:97
Ark::internal::utf8_char_t::m_length
length_t m_length
Definition
utf8_char.hpp:98
Ark::internal::utf8_char_t::codepoint
codepoint_t codepoint() const
Definition
utf8_char.hpp:94
Ark::internal::utf8_char_t::repr_t
std::array< unsigned char, 5 > repr_t
Definition
utf8_char.hpp:17
Ark::internal::utf8_char_t::c_str
const char * c_str() const
Definition
utf8_char.hpp:92
Ark::internal::utf8_char_t::m_repr
repr_t m_repr
Definition
utf8_char.hpp:99
Ark::internal::utf8_char_t::isPrintable
bool isPrintable() const
Definition
utf8_char.hpp:85
Ark::internal::utf8_char_t::at
static std::pair< std::string::iterator, utf8_char_t > at(const std::string::iterator it, const std::string::iterator end)
Parse a codepoint and compute its length and representation.
Definition
utf8_char.hpp:32
Ark::internal::utf8_char_t::length_t
unsigned char length_t
Definition
utf8_char.hpp:16
Ark::internal::utf8_char_t::size
std::size_t size() const
Definition
utf8_char.hpp:93
Ark::internal
Definition
Builtins.hpp:26
include
Ark
Compiler
AST
utf8_char.hpp
Generated on Wed Jan 15 2025 19:51:44 for ArkScript by
1.12.0