Photon Common Library 2.0.0-beta
A physically based renderer.
Loading...
Searching...
No Matches
string_utils.h
Go to the documentation of this file.
1#pragma once
2
8#include "Common/assertion.h"
9#include "Common/exceptions.h"
10
11#include <cstddef>
12#include <string>
13#include <algorithm>
14#include <string_view>
15#include <stdexcept>
16#include <charconv>
17#include <limits>
18#include <climits>
19#include <type_traits>
20#include <format>
21#include <concepts>
22#include <array>
23
25{
26
27template<typename ObjType>
28concept CHasToString = requires (const ObjType& obj)
29{
30 { obj.toString() } -> std::convertible_to<std::string_view>;
31};
32
33}// end namespace ph::string_utils
34
35#define PH_DEFINE_INLINE_TO_STRING_FORMATTER_SPECIALIZATION(...)\
36 struct std::formatter<__VA_ARGS__> : std::formatter<std::string>\
37 {\
38 static_assert(::ph::string_utils::CHasToString<__VA_ARGS__>,\
39 "type " #__VA_ARGS__ " must have a const method toString() and the result should be "\
40 "implicitly convertible to std::string"); \
41 \
42 /* `parse()` is inherited from the base class */\
43 \
44 /* Define `format()` by calling `std::string`'s implementation with custom type's `toString()`*/\
45 inline auto format(const __VA_ARGS__& value, std::format_context& ctx) const\
46 {\
47 return std::formatter<std::string>::format(\
48 value.toString(), ctx);\
49 }\
50 }
51
64#define PH_DEFINE_INLINE_TO_STRING_FORMATTER(...)\
65 template<>\
66 PH_DEFINE_INLINE_TO_STRING_FORMATTER_SPECIALIZATION(__VA_ARGS__)
67
82#define PH_DEFINE_INLINE_TO_STRING_FORMATTER_TEMPLATE(...)\
83 PH_DEFINE_INLINE_TO_STRING_FORMATTER_SPECIALIZATION(__VA_ARGS__)
84
85namespace ph::string_utils
86{
87
88enum class EWhitespace
89{
91 Common,
92
95};
96
97template<EWhitespace TYPE = EWhitespace::Common>
98inline std::string_view get_whitespaces()
99{
100 if constexpr(TYPE == EWhitespace::Common)
101 {
103 }
104 else if constexpr(TYPE == EWhitespace::Standard)
105 {
107 }
108 else
109 {
110 static_assert(TYPE == EWhitespace::Common || TYPE == EWhitespace::Standard,
111 "Must include a case for each enum entry; did you forget to add one?");
112
113 return "";
114 }
115}
116
117template<EWhitespace TYPE = EWhitespace::Common>
118inline constexpr bool is_whitespace(const char ch)
119{
120 return get_whitespaces<TYPE>().find(ch) != std::string_view::npos;
121}
122
123inline bool has_any_of(const std::string_view srcStr, const std::string_view candidates)
124{
125 const auto foundPos = srcStr.find_first_of(candidates);
126 return foundPos != std::string_view::npos;
127}
128
129inline bool has_none_of(const std::string_view srcStr, const std::string_view candidates)
130{
131 return !has_any_of(srcStr, candidates);
132}
133
143inline std::string_view cut_head(const std::string_view srcStr, const std::string_view candidates)
144{
145 const auto nonCutPos = srcStr.find_first_not_of(candidates);
146
147 auto cutStr = srcStr;
148
149 // remove_prefix(): behavior is undefined for inputPos > size(), avoid that
150 // with the ternary operator
151 cutStr.remove_prefix(
152 nonCutPos != std::string_view::npos ? nonCutPos : srcStr.size());
153
154 return cutStr;
155}
156
166inline std::string_view cut_tail(const std::string_view srcStr, const std::string_view candidates)
167{
168 const auto nonCutPos = srcStr.find_last_not_of(candidates);
169
170 auto cutStr = srcStr;
171
172 // remove_suffix(): behavior is undefined for inputPos > size(), avoid that
173 // with the ternary operator;
174 // also, if <nonCutPos> is not npos, <srcStr> will not be empty
175 cutStr.remove_suffix(
176 nonCutPos != std::string_view::npos ? srcStr.size() - 1 - nonCutPos : srcStr.size());
177
178 return cutStr;
179}
180
190inline std::string_view cut_ends(const std::string_view srcStr, const std::string_view candidates)
191{
192 return cut_head(cut_tail(srcStr, candidates), candidates);
193}
194
200template<EWhitespace TYPE = EWhitespace::Common>
201inline std::string_view trim_head(const std::string_view srcStr)
202{
203 return cut_head(srcStr, get_whitespaces<TYPE>());
204}
205
211template<EWhitespace TYPE = EWhitespace::Common>
212inline std::string_view trim_tail(const std::string_view srcStr)
213{
214 return cut_tail(srcStr, get_whitespaces<TYPE>());
215}
216
222template<EWhitespace TYPE = EWhitespace::Common>
223inline std::string_view trim(const std::string_view srcStr)
224{
225 return trim_head<TYPE>(trim_tail<TYPE>(srcStr));
226}
227
235inline std::string_view next_token(
236 std::string_view srcStr,
237 std::string_view* const out_remainingStr = nullptr,
238 const std::string_view tokenSeparators = get_whitespaces<>())
239{
240 srcStr = cut_head(srcStr, tokenSeparators);
241
242 const auto separatorPos = srcStr.find_first_of(tokenSeparators);
243 if(separatorPos != std::string_view::npos)
244 {
245 const auto nextToken = srcStr.substr(0, separatorPos);
246 if(out_remainingStr)
247 {
248 // `separatorPos + 1` as we do not want to include the separator
249 *out_remainingStr = srcStr.substr(separatorPos + 1);
250 }
251
252 return nextToken;
253 }
254 else
255 {
256 return srcStr;
257 }
258}
259
265inline char az_to_AZ(const char ch)
266{
267 static_assert(std::numeric_limits<unsigned char>::max() == table::ASCII_TO_UPPER.size() - 1);
268
269 const auto mappedCharIdx = static_cast<unsigned char>(ch);
270 return static_cast<char>(table::ASCII_TO_UPPER[mappedCharIdx]);
271}
272
278inline char AZ_to_az(const char ch)
279{
280 static_assert(std::numeric_limits<unsigned char>::max() == table::ASCII_TO_LOWER.size() - 1);
281
282 const auto mappedCharIdx = static_cast<unsigned char>(ch);
283 return static_cast<char>(table::ASCII_TO_LOWER[mappedCharIdx]);
284}
285
293inline void az_to_AZ(std::string& str)
294{
295 for(char& ch : str)
296 {
297 ch = az_to_AZ(ch);
298 }
299}
300
308inline void AZ_to_az(std::string& str)
309{
310 for(char& ch : str)
311 {
312 ch = AZ_to_az(ch);
313 }
314}
315
318inline std::string repeat(const std::string_view str, const std::size_t n)
319{
320 const std::size_t totalSize = str.size() * n;
321
322 // Valid for the case where <totalSize> is 0
323 std::string result;
324 result.reserve(totalSize);
325 for(std::size_t i = 0; i < n; ++i)
326 {
327 result += str;
328 }
329
330 return result;
331}
332
335inline void erase_all(std::string& str, const char ch)
336{
337 str.erase(std::remove(str.begin(), str.end(), ch), str.end());
338}
339
340namespace detail_from_to_char
341{
342
343inline void throw_from_std_errc_if_has_error(const std::errc errorCode)
344{
345 // According to several sources, 0, or zero-initialized std::errc,
346 // indicates no error.
347 //
348 // [1] see the example for std::from_chars
349 // https://en.cppreference.com/w/cpp/utility/from_chars
350 // [2] https://stackoverflow.com/a/63567008
351 //
352 constexpr std::errc NO_ERROR_VALUE = std::errc();
353
354 switch(errorCode)
355 {
356 case NO_ERROR_VALUE:
357 return;
358
359 case std::errc::invalid_argument:
361 "input cannot be interpreted as a numeric value");
362
363 case std::errc::result_out_of_range:
364 throw OverflowException(
365 "result will overflow the arithmetic type");
366
367 case std::errc::value_too_large:
369 "result cannot fit in the output buffer");
370
371 default:
372 throw RuntimeException(
373 "unknown error: std::errc = " + std::to_string(
374 static_cast<std::underlying_type_t<std::errc>>(errorCode)));
375 }
376}
377
378}// end namespace detail_from_to_char
379
383template<typename T>
384inline T parse_float(const std::string_view floatStr)
385{
386 static_assert(std::is_floating_point_v<T>,
387 "parse_float() accepts only floating point type.");
388
389 // `std::from_chars()` do not ignore leading whitespaces, we need to do it manually
390 const std::string_view floatStrNoLeadingWS = trim_head(floatStr);
391
392 T value;
393 const std::from_chars_result result = std::from_chars(
394 floatStrNoLeadingWS.data(),
395 floatStrNoLeadingWS.data() + floatStrNoLeadingWS.size(),
396 value);
397
399
400 return value;
401}
402
408template<typename T>
409inline T parse_int(std::string_view intStr)
410{
411 // TODO: option to handle base prefix (e.g., 0x)
412
413 static_assert(std::is_integral_v<T>,
414 "parse_int() accepts only integer type.");
415
416 // `std::from_chars()` do not ignore leading whitespaces, we need to do it manually
417 intStr = trim_head(intStr);
418
419 int base = 10;
420 if(intStr.starts_with("0x"))
421 {
422 base = 16;
423
424 // Remove "0x" as `std::from_chars()` do not recognize base prefix
425 intStr.remove_prefix(2);
426 }
427
428 // `std::from_chars()` does not support `bool` so we treat it as unsigned char
429 using IntType = std::conditional_t<std::is_same_v<T, bool>, unsigned char, T>;
430
431 std::remove_const_t<IntType> intValue;
432 const std::from_chars_result result = std::from_chars(
433 intStr.data(),
434 intStr.data() + intStr.size(),
435 intValue,
436 base);
437
439
440 return static_cast<T>(intValue);
441}
442
446template<typename NumberType>
447inline NumberType parse_number(const std::string_view numberStr)
448{
449 if constexpr(std::is_floating_point_v<NumberType>)
450 {
451 return parse_float<NumberType>(numberStr);
452 }
453 else
454 {
455 static_assert(std::is_integral_v<NumberType>);
456
457 return parse_int<NumberType>(numberStr);
458 }
459}
460
474template<typename T>
475inline std::size_t stringify_float(const T value, char* const out_buffer, const std::size_t bufferSize)
476{
477 // TODO: option to handle base prefix (e.g., 0x)
478 // TODO: option to handle precision
479
480 static_assert(std::is_floating_point_v<T>,
481 "stringify_float() accepts only floating point type.");
482
483 PH_ASSERT(out_buffer);
484 PH_ASSERT_GE(bufferSize, 1);
485
486 const std::to_chars_result result = std::to_chars(
487 out_buffer,
488 out_buffer + bufferSize,
489 value);
490
492
493 // Must written at least a char, and must not exceed bufferSize
494 PH_ASSERT(out_buffer < result.ptr && result.ptr <= out_buffer + bufferSize);
495 return static_cast<std::size_t>(result.ptr - out_buffer);
496}
497
508template<std::integral T>
509inline std::size_t stringify_int_alphabetic(
510 const T value,
511 char* const out_buffer,
512 const std::size_t bufferSize,
513 const int base)
514{
515 PH_ASSERT(out_buffer);
516 PH_ASSERT_GE(bufferSize, 1);
517 PH_ASSERT_IN_RANGE_INCLUSIVE(base, 2, 62);
518
519 // Treat `bool` as unsigned char (for arithmetics)
520 using IntType = std::conditional_t<std::is_same_v<T, bool>, unsigned char, T>;
521 auto intValue = static_cast<std::remove_const_t<IntType>>(value);
522
523 std::size_t numCharsWritten = 0;
524
525 // Write sign
526 if constexpr(std::is_signed_v<T>)
527 {
528 if(intValue < 0)
529 {
530 out_buffer[0] = '-';
531 ++numCharsWritten;
532
533 intValue = -intValue;
534 }
535 }
536
537 // Use a temporary buffer, enough to hold base 2 output
538 std::array<unsigned char, sizeof(IntType) * CHAR_BIT> tmpBuffer;
539 auto tmpBufferEnd = tmpBuffer.end();
540
541 PH_ASSERT_GE(intValue, 0);
542 do
543 {
544 *(--tmpBufferEnd) = table::BASE62_DIGITS[intValue % base];
545 intValue /= base;
546 } while(intValue > 0);
547
548 auto numDigits = tmpBuffer.end() - tmpBufferEnd;
549 if(numCharsWritten + numDigits > bufferSize)
550 {
552 "result cannot fit in the output buffer: need={}, given={}",
553 numCharsWritten + numDigits, bufferSize);
554 }
555 else
556 {
557 std::copy(tmpBufferEnd, tmpBuffer.end(), out_buffer + numCharsWritten);
558 numCharsWritten += numDigits;
559 }
560
561 return numCharsWritten;
562}
563
574template<std::integral T>
575inline std::size_t stringify_int(
576 const T value,
577 char* const out_buffer,
578 const std::size_t bufferSize,
579 const int base = 10)
580{
581 PH_ASSERT_IN_RANGE_INCLUSIVE(base, 2, 62);
582
583 // Base in [2, 36] is supported by STL via `to_chars()`
584 if(2 <= base && base <= 36)
585 {
586 PH_ASSERT(out_buffer);
587 PH_ASSERT_GE(bufferSize, 1);
588
589 // `std::to_chars()` does not support `bool` so we treat it as unsigned char
590 using IntType = std::conditional_t<std::is_same_v<T, bool>, unsigned char, T>;
591 const auto intValue = static_cast<IntType>(value);
592
593 std::to_chars_result result = std::to_chars(
594 out_buffer,
595 out_buffer + bufferSize,
596 intValue,
597 base);
598
600
601 // Must written at least a char, and must not exceed bufferSize
602 PH_ASSERT(out_buffer < result.ptr && result.ptr <= out_buffer + bufferSize);
603 return static_cast<std::size_t>(result.ptr - out_buffer);
604 }
605 else
606 {
607 return stringify_int_alphabetic(value, out_buffer, bufferSize, base);
608 }
609}
610
616template<typename NumberType>
617inline std::size_t stringify_number(
618 const NumberType value,
619 char* const out_buffer,
620 const std::size_t bufferSize)
621{
622 if constexpr(std::is_floating_point_v<NumberType>)
623 {
624 return stringify_float<NumberType>(value, out_buffer, bufferSize);
625 }
626 else
627 {
628 static_assert(std::is_integral_v<NumberType>);
629
630 return stringify_int<NumberType>(value, out_buffer, bufferSize);
631 }
632}
633
641template<typename NumberType>
642inline std::string& stringify_number(
643 const NumberType value,
644 std::string& out_str,
645 const std::size_t maxChars = 64)
646{
647 const auto originalSize = out_str.size();
648 out_str.resize(originalSize + maxChars);
649
650 const std::size_t newSize = string_utils::stringify_number<NumberType>(
651 value, out_str.data() + originalSize, maxChars);
652
653 out_str.resize(originalSize + newSize);
654 return out_str;
655}
656
663template<typename NumberType>
664inline std::string stringify_number(
665 const NumberType value,
666 const std::size_t maxChars = 64)
667{
668 std::string str;
669 stringify_number(value, str, maxChars);
670 return str;
671}
672
673}// end namespace ph::string_utils
#define PH_ASSERT_GE(a, b)
Definition assertion.h:67
#define PH_ASSERT_IN_RANGE_INCLUSIVE(value, lowerBound, upperBound)
Similar to PH_ASSERT_IN_RANGE(3), except the bounds are inclusive.
Definition assertion.h:80
#define PH_ASSERT(condition)
Definition assertion.h:49
Definition exceptions.h:67
Definition exceptions.h:73
Definition exceptions.h:49
General exception thrown on runtime error.
Definition exceptions.h:21
Definition string_utils.h:28
void throw_from_std_errc_if_has_error(const std::errc errorCode)
Definition string_utils.h:343
constexpr std::array< unsigned char, 256 > ASCII_TO_UPPER
Definition string_utils_table.h:26
constexpr std::array< unsigned char, 256 > ASCII_TO_LOWER
Definition string_utils_table.h:67
constexpr std::array< unsigned char, 62 > BASE62_DIGITS
Definition string_utils_table.h:103
constexpr std::string_view common_whitespaces
Commonly used whitespace characters.
Definition string_utils_table.h:13
constexpr std::string_view standard_whitespaces
Standard whitespace characters.
Definition string_utils_table.h:19
Contains various string manipulation helpers.
Definition string_utils.h:25
std::string_view trim_head(const std::string_view srcStr)
Remove white spaces from the beginning.
Definition string_utils.h:201
std::size_t stringify_int(const T value, char *const out_buffer, const std::size_t bufferSize, const int base=10)
Converts an integer to string.
Definition string_utils.h:575
std::string_view trim_tail(const std::string_view srcStr)
Remove white spaces from the end.
Definition string_utils.h:212
char az_to_AZ(const char ch)
Convert lower-case characters to upper-case.
Definition string_utils.h:265
bool has_any_of(const std::string_view srcStr, const std::string_view candidates)
Definition string_utils.h:123
std::string_view trim(const std::string_view srcStr)
Remove white spaces from both ends.
Definition string_utils.h:223
T parse_float(const std::string_view floatStr)
Returns a float by processing its string representation. Supports float, double, and long double.
Definition string_utils.h:384
void erase_all(std::string &str, const char ch)
Remove all occurrence of a character in the string.
Definition string_utils.h:335
std::string_view cut_head(const std::string_view srcStr, const std::string_view candidates)
Remove characters from the beginning.
Definition string_utils.h:143
std::string_view cut_tail(const std::string_view srcStr, const std::string_view candidates)
Remove characters from the end.
Definition string_utils.h:166
constexpr bool is_whitespace(const char ch)
Definition string_utils.h:118
T parse_int(std::string_view intStr)
Returns an integer by processing its string representation. Supports the following:
Definition string_utils.h:409
std::string repeat(const std::string_view str, const std::size_t n)
Repeat the input string for N times.
Definition string_utils.h:318
EWhitespace
Definition string_utils.h:89
char AZ_to_az(const char ch)
Convert upper-case characters to lower-case.
Definition string_utils.h:278
std::string_view cut_ends(const std::string_view srcStr, const std::string_view candidates)
Remove characters from both ends.
Definition string_utils.h:190
std::string_view get_whitespaces()
Definition string_utils.h:98
std::size_t stringify_int_alphabetic(const T value, char *const out_buffer, const std::size_t bufferSize, const int base)
Converts an integer to base [2, 62] string.
Definition string_utils.h:509
bool has_none_of(const std::string_view srcStr, const std::string_view candidates)
Definition string_utils.h:129
std::string_view next_token(std::string_view srcStr, std::string_view *const out_remainingStr=nullptr, const std::string_view tokenSeparators=get_whitespaces<>())
Retrieve a token from a string.
Definition string_utils.h:235
NumberType parse_number(const std::string_view numberStr)
Returns a number by processing its string representation. Accepts all types supported by parse_float(...
Definition string_utils.h:447
std::size_t stringify_float(const T value, char *const out_buffer, const std::size_t bufferSize)
Converts a float to string.
Definition string_utils.h:475
std::size_t stringify_number(const NumberType value, char *const out_buffer, const std::size_t bufferSize)
Converts a number to string. Accepts all types supported by stringify_float(T, char*,...
Definition string_utils.h:617
void throw_formatted(const std::format_string< Args... > msgFormat, Args &&... args)
Definition exceptions.h:85