38 template <
typename In>
39 In Utf<8>::decode(In begin, In end, Uint32& output, Uint32 replacement)
42 static const int trailing[256] =
44 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
45 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
46 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
47 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
48 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
50 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
51 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5
53 static const Uint32 offsets[6] =
55 0x00000000, 0x00003080, 0x000E2080, 0x03C82080, 0xFA082080, 0x82082080
59 int trailingBytes = trailing[
static_cast<Uint8
>(*begin)];
60 if (begin + trailingBytes < end)
63 switch (trailingBytes)
65 case 5 : output +=
static_cast<Uint8
>(*begin++); output <<= 6;
66 case 4 : output +=
static_cast<Uint8
>(*begin++); output <<= 6;
67 case 3 : output +=
static_cast<Uint8
>(*begin++); output <<= 6;
68 case 2 : output +=
static_cast<Uint8
>(*begin++); output <<= 6;
69 case 1 : output +=
static_cast<Uint8
>(*begin++); output <<= 6;
70 case 0 : output +=
static_cast<Uint8
>(*begin++);
72 output -= offsets[trailingBytes];
86 template <
typename Out>
87 Out Utf<8>::encode(Uint32 input, Out output, Uint8 replacement)
90 static const Uint8 firstBytes[7] =
92 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC
96 if ((input > 0x0010FFFF) || ((input >= 0xD800) && (input <= 0xDBFF)))
100 *output++ = replacement;
107 std::size_t bytestoWrite = 1;
108 if (input < 0x80) bytestoWrite = 1;
109 else if (input < 0x800) bytestoWrite = 2;
110 else if (input < 0x10000) bytestoWrite = 3;
111 else if (input <= 0x0010FFFF) bytestoWrite = 4;
115 switch (bytestoWrite)
117 case 4 : bytes[3] =
static_cast<Uint8
>((input | 0x80) & 0xBF); input >>= 6;
118 case 3 : bytes[2] =
static_cast<Uint8
>((input | 0x80) & 0xBF); input >>= 6;
119 case 2 : bytes[1] =
static_cast<Uint8
>((input | 0x80) & 0xBF); input >>= 6;
120 case 1 : bytes[0] =
static_cast<Uint8
> (input | firstBytes[bytestoWrite]);
124 output = std::copy(bytes, bytes + bytestoWrite, output);
132 template <
typename In>
133 In Utf<8>::next(In begin, In end)
136 return decode(begin, end, codepoint);
141 template <
typename In>
142 std::size_t Utf<8>::count(In begin, In end)
144 std::size_t length = 0;
147 begin = next(begin, end);
156 template <
typename In,
typename Out>
157 Out Utf<8>::fromAnsi(In begin, In end, Out output,
const std::locale& locale)
161 Uint32 codepoint = Utf<32>::decodeAnsi(*begin++, locale);
162 output = encode(codepoint, output);
170 template <
typename In,
typename Out>
171 Out Utf<8>::fromWide(In begin, In end, Out output)
175 Uint32 codepoint = Utf<32>::decodeWide(*begin++);
176 output = encode(codepoint, output);
184 template <
typename In,
typename Out>
185 Out Utf<8>::fromLatin1(In begin, In end, Out output)
190 output = encode(*begin++, output);
197 template <
typename In,
typename Out>
198 Out Utf<8>::toAnsi(In begin, In end, Out output,
char replacement,
const std::locale& locale)
203 begin = decode(begin, end, codepoint);
204 output = Utf<32>::encodeAnsi(codepoint, output, replacement, locale);
212 template <
typename In,
typename Out>
213 Out Utf<8>::toWide(In begin, In end, Out output,
wchar_t replacement)
218 begin = decode(begin, end, codepoint);
219 output = Utf<32>::encodeWide(codepoint, output, replacement);
227 template <
typename In,
typename Out>
228 Out Utf<8>::toLatin1(In begin, In end, Out output,
char replacement)
235 begin = decode(begin, end, codepoint);
236 *output++ = codepoint < 256 ? static_cast<char>(codepoint) : replacement;
244 template <
typename In,
typename Out>
245 Out Utf<8>::toUtf8(In begin, In end, Out output)
247 return std::copy(begin, end, output);
252 template <
typename In,
typename Out>
253 Out Utf<8>::toUtf16(In begin, In end, Out output)
258 begin = decode(begin, end, codepoint);
259 output = Utf<16>::encode(codepoint, output);
267 template <
typename In,
typename Out>
268 Out Utf<8>::toUtf32(In begin, In end, Out output)
273 begin = decode(begin, end, codepoint);
274 *output++ = codepoint;
282 template <
typename In>
283 In Utf<16>::decode(In begin, In end, Uint32& output, Uint32 replacement)
285 Uint16 first = *begin++;
288 if ((first >= 0xD800) && (first <= 0xDBFF))
292 Uint32 second = *begin++;
293 if ((second >= 0xDC00) && (second <= 0xDFFF))
296 output =
static_cast<Uint32
>(((first - 0xD800) << 10) + (second - 0xDC00) + 0x0010000);
301 output = replacement;
308 output = replacement;
322 template <
typename Out>
323 Out Utf<16>::encode(Uint32 input, Out output, Uint16 replacement)
328 if ((input >= 0xD800) && (input <= 0xDFFF))
332 *output++ = replacement;
337 *output++ =
static_cast<Uint16
>(input);
340 else if (input > 0x0010FFFF)
344 *output++ = replacement;
350 *output++ =
static_cast<Uint16
>((input >> 10) + 0xD800);
351 *output++ =
static_cast<Uint16
>((input & 0x3FFUL) + 0xDC00);
359 template <
typename In>
360 In Utf<16>::next(In begin, In end)
363 return decode(begin, end, codepoint);
368 template <
typename In>
369 std::size_t Utf<16>::count(In begin, In end)
371 std::size_t length = 0;
374 begin = next(begin, end);
383 template <
typename In,
typename Out>
384 Out Utf<16>::fromAnsi(In begin, In end, Out output,
const std::locale& locale)
388 Uint32 codepoint = Utf<32>::decodeAnsi(*begin++, locale);
389 output = encode(codepoint, output);
397 template <
typename In,
typename Out>
398 Out Utf<16>::fromWide(In begin, In end, Out output)
402 Uint32 codepoint = Utf<32>::decodeWide(*begin++);
403 output = encode(codepoint, output);
411 template <
typename In,
typename Out>
412 Out Utf<16>::fromLatin1(In begin, In end, Out output)
416 return std::copy(begin, end, output);
421 template <
typename In,
typename Out>
422 Out Utf<16>::toAnsi(In begin, In end, Out output,
char replacement,
const std::locale& locale)
427 begin = decode(begin, end, codepoint);
428 output = Utf<32>::encodeAnsi(codepoint, output, replacement, locale);
436 template <
typename In,
typename Out>
437 Out Utf<16>::toWide(In begin, In end, Out output,
wchar_t replacement)
442 begin = decode(begin, end, codepoint);
443 output = Utf<32>::encodeWide(codepoint, output, replacement);
451 template <
typename In,
typename Out>
452 Out Utf<16>::toLatin1(In begin, In end, Out output,
char replacement)
458 *output++ = *begin < 256 ? static_cast<char>(*begin) : replacement;
467 template <
typename In,
typename Out>
468 Out Utf<16>::toUtf8(In begin, In end, Out output)
473 begin = decode(begin, end, codepoint);
474 output = Utf<8>::encode(codepoint, output);
482 template <
typename In,
typename Out>
483 Out Utf<16>::toUtf16(In begin, In end, Out output)
485 return std::copy(begin, end, output);
490 template <
typename In,
typename Out>
491 Out Utf<16>::toUtf32(In begin, In end, Out output)
496 begin = decode(begin, end, codepoint);
497 *output++ = codepoint;
505 template <
typename In>
506 In Utf<32>::decode(In begin, In , Uint32& output, Uint32 )
514 template <
typename Out>
515 Out Utf<32>::encode(Uint32 input, Out output, Uint32 )
523 template <
typename In>
524 In Utf<32>::next(In begin, In )
531 template <
typename In>
532 std::size_t Utf<32>::count(In begin, In end)
539 template <
typename In,
typename Out>
540 Out Utf<32>::fromAnsi(In begin, In end, Out output,
const std::locale& locale)
543 *output++ = decodeAnsi(*begin++, locale);
550 template <
typename In,
typename Out>
551 Out Utf<32>::fromWide(In begin, In end, Out output)
554 *output++ = decodeWide(*begin++);
561 template <
typename In,
typename Out>
562 Out Utf<32>::fromLatin1(In begin, In end, Out output)
566 return std::copy(begin, end, output);
571 template <
typename In,
typename Out>
572 Out Utf<32>::toAnsi(In begin, In end, Out output,
char replacement,
const std::locale& locale)
575 output = encodeAnsi(*begin++, output, replacement, locale);
582 template <
typename In,
typename Out>
583 Out Utf<32>::toWide(In begin, In end, Out output,
wchar_t replacement)
586 output = encodeWide(*begin++, output, replacement);
593 template <
typename In,
typename Out>
594 Out Utf<32>::toLatin1(In begin, In end, Out output,
char replacement)
600 *output++ = *begin < 256 ? static_cast<char>(*begin) : replacement;
609 template <
typename In,
typename Out>
610 Out Utf<32>::toUtf8(In begin, In end, Out output)
613 output = Utf<8>::encode(*begin++, output);
619 template <
typename In,
typename Out>
620 Out Utf<32>::toUtf16(In begin, In end, Out output)
623 output = Utf<16>::encode(*begin++, output);
630 template <
typename In,
typename Out>
631 Out Utf<32>::toUtf32(In begin, In end, Out output)
633 return std::copy(begin, end, output);
638 template <
typename In>
639 Uint32 Utf<32>::decodeAnsi(In input,
const std::locale& locale)
646 #if defined(SFML_SYSTEM_WINDOWS) && \
647 (defined(__GLIBCPP__) || defined (__GLIBCXX__)) && \
648 !(defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION))
652 wchar_t character = 0;
653 mbtowc(&character, &input, 1);
654 return static_cast<Uint32
>(character);
659 const std::ctype<wchar_t>& facet = std::use_facet< std::ctype<wchar_t> >(locale);
662 return static_cast<Uint32
>(facet.widen(input));
669 template <
typename In>
670 Uint32 Utf<32>::decodeWide(In input)
683 template <
typename Out>
684 Out Utf<32>::encodeAnsi(Uint32 codepoint, Out output,
char replacement,
const std::locale& locale)
691 #if defined(SFML_SYSTEM_WINDOWS) && \
692 (defined(__GLIBCPP__) || defined (__GLIBCXX__)) && \
693 !(defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION))
698 if (wctomb(&character, static_cast<wchar_t>(codepoint)) >= 0)
699 *output++ = character;
700 else if (replacement)
701 *output++ = replacement;
708 const std::ctype<wchar_t>& facet = std::use_facet< std::ctype<wchar_t> >(locale);
711 *output++ = facet.narrow(static_cast<wchar_t>(codepoint), replacement);
720 template <
typename Out>
721 Out Utf<32>::encodeWide(Uint32 codepoint, Out output,
wchar_t replacement)
729 switch (
sizeof(
wchar_t))
733 *output++ =
static_cast<wchar_t>(codepoint);
739 if ((codepoint <= 0xFFFF) && ((codepoint < 0xD800) || (codepoint > 0xDFFF)))
741 *output++ =
static_cast<wchar_t>(codepoint);
743 else if (replacement)
745 *output++ = replacement;