Skip to content

Commit

Permalink
Cygwin: Add ISO C2X functions c8rtomb, mbrtoc8
Browse files Browse the repository at this point in the history
Signed-off-by: Corinna Vinschen <[email protected]>
  • Loading branch information
github-cygwin committed Aug 2, 2023
1 parent 290b56a commit c49bc47
Show file tree
Hide file tree
Showing 5 changed files with 184 additions and 7 deletions.
3 changes: 2 additions & 1 deletion winsup/cygwin/include/cygwin/version.h
Original file line number Diff line number Diff line change
Expand Up @@ -482,12 +482,13 @@ details. */
346: (Belatedly) add posix_spawn_file_actions_addchdir_np,
posix_spawn_file_actions_addfchdir_np.
347: Add c16rtomb, c32rtomb, mbrtoc16, mbrtoc32.
348: Add c8rtomb, mbrtoc.
Note that we forgot to bump the api for ualarm, strtoll, strtoull,
sigaltstack, sethostname. */

#define CYGWIN_VERSION_API_MAJOR 0
#define CYGWIN_VERSION_API_MINOR 346
#define CYGWIN_VERSION_API_MINOR 348

/* There is also a compatibity version number associated with the shared memory
regions. It is incremented when incompatible changes are made to the shared
Expand Down
14 changes: 12 additions & 2 deletions winsup/cygwin/include/uchar.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,11 @@
#include <sys/cdefs.h>
#include <wchar.h>

typedef __uint16_t char16_t;
typedef __uint32_t char32_t;
/* Either C2x or if C++ doesn't already define char8_t */
#if __ISO_C_VISIBLE >= 2020 && !defined (__cpp_char8_t)
typedef unsigned char char8_t;
#endif

/* C++11 already defines those types. */
#if !defined (__cplusplus) || (__cplusplus - 0 < 201103L)
typedef __uint_least16_t char16_t;
Expand All @@ -14,6 +17,13 @@ typedef __uint_least32_t char32_t;

__BEGIN_DECLS

/* Either C2x or if C++ defines char8_t */
#if __ISO_C_VISIBLE >= 2020 || defined (__cpp_char8_t)
size_t c8rtomb(char * __restrict, char8_t, mbstate_t * __restrict);
size_t mbrtoc8(char8_t * __restrict, const char * __restrict, size_t,
mbstate_t * __restrict);
#endif

size_t c16rtomb(char * __restrict, char16_t, mbstate_t * __restrict);
size_t mbrtoc16(char16_t * __restrict, const char * __restrict, size_t,
mbstate_t * __restrict);
Expand Down
2 changes: 1 addition & 1 deletion winsup/cygwin/release/3.5.0
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ What's new:
- New API calls: posix_spawn_file_actions_addchdir_np,
posix_spawn_file_actions_addfchdir_np.

- New API calls: c16rtomb, c32rtomb, mbrtoc16, mbrtoc32.
- New API calls: c8rtomb, c16rtomb, c32rtomb, mbrtoc8, mbrtoc16, mbrtoc32.

What changed:
-------------
Expand Down
166 changes: 166 additions & 0 deletions winsup/cygwin/strfuncs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,103 @@ c16rtomb (char *s, char16_t wc, mbstate_t *ps)
return wcrtomb (s, (wchar_t) wc, ps);
}

extern "C" size_t
c8rtomb (char *s, char8_t c8, mbstate_t *ps)
{
struct _reent *reent = _REENT;
char32_t wc;

if (ps == NULL)
{
_REENT_CHECK_MISC(reent);
ps = &(_REENT_MBRTOWC_STATE(reent));
}

if (s == NULL)
{
ps->__count = 0;
return 1;
}
if ((ps->__count & 0xff00) != 0xc800)
{
switch (c8)
{
case 0 ... 0x7f: /* single octet */
ps->__count = 0;
wc = c8;
break;
case 0xc2 ... 0xf4: /* valid lead byte */
ps->__count = 0xc801;
ps->__value.__wchb[0] = c8;
return 0;
default:
goto ilseq;
}
}
else
{
/* We already collected something... */
int idx = ps->__count & 0x3;
char8_t &c1 = ps->__value.__wchb[0];
char8_t &c2 = ps->__value.__wchb[1];
char8_t &c3 = ps->__value.__wchb[2];

switch (idx)
{
case 1:
/* Annoyingly complex check for validity for 2nd octet. */
if (c8 <= 0x7f || c8 >= 0xc0)
goto ilseq;
if (c1 == 0xe0 && c8 <= 0x9f)
goto ilseq;
if (c1 == 0xed && c8 >= 0xa0)
goto ilseq;
if (c1 == 0xf0 && c8 <= 0x8f)
goto ilseq;
if (c1 == 0xf4 && c8 >= 0x90)
goto ilseq;
if (c1 >= 0xe0)
{
ps->__count = 0xc802;
c2 = c8;
return 0;
}
wc = ((c1 & 0x1f) << 6)
| (c8 & 0x3f);
break;
case 2:
if (c8 <= 0x7f || c8 >= 0xc0)
goto ilseq;
if (c1 >= 0xf0)
{
ps->__count = 0xc803;
c3 = c8;
return 0;
}
wc = ((c1 & 0x0f) << 12)
| ((c2 & 0x3f) << 6)
| (c8 & 0x3f);
break;
case 3:
if (c8 <= 0x7f || c8 >= 0xc0)
goto ilseq;
wc = ((c1 & 0x07) << 18)
| ((c2 & 0x3f) << 12)
| ((c3 & 0x3f) << 6)
| (c8 & 0x3f);
break;
default: /* Shouldn't happen */
goto ilseq;
}
}
ps->__count = 0;
return c32rtomb (s, wc, ps);
ilseq:
ps->__count = 0;
_REENT_ERRNO(reent) = EILSEQ;
return (size_t)(-1);
}

extern "C" size_t
mbrtoc32 (char32_t *pwc, const char *s, size_t n, mbstate_t *ps)
{
Expand Down Expand Up @@ -245,6 +342,75 @@ mbrtoc16 (char16_t *pwc, const char *s, size_t n, mbstate_t *ps)
return (size_t)(-1);
}

extern "C" size_t
mbrtoc8 (char8_t *pc8, const char *s, size_t n, mbstate_t *ps)
{
struct _reent *reent = _REENT;
size_t len;
char32_t wc;

if (ps == NULL)
{
_REENT_CHECK_MISC(reent);
ps = &(_REENT_MBRTOWC_STATE(reent));
}

if (s == NULL)
{
if (ps)
ps->__count = 0;
return 1;
}
else if ((ps->__count & 0xff00) == 0xc800)
{
/* Return next utf-8 octet in line. */
int idx = ps->__count & 0x3;

if (pc8)
*pc8 = ps->__value.__wchb[--idx];
if (idx == 0)
ps->__count = 0;
return -3;
}
len = mbrtoc32 (&wc, s, n, ps);
if (len > 0)
{
/* octets stored back to front for easier indexing */
switch (wc)
{
case 0 ... 0x7f:
ps->__value.__wchb[0] = wc;
ps->__count = 0;
break;
case 0x80 ... 0x7ff:
ps->__value.__wchb[1] = 0xc0 | ((wc & 0x7c0) >> 6);
ps->__value.__wchb[0] = 0x80 | (wc & 0x3f);
ps->__count = 0xc800 | 1;
break;
case 0x800 ... 0xffff:
ps->__value.__wchb[2] = 0xe0 | ((wc & 0xf000) >> 12);
ps->__value.__wchb[1] = 0x80 | ((wc & 0xfc0) >> 6);
ps->__value.__wchb[0] = 0x80 | (wc & 0x3f);
ps->__count = 0xc800 | 2;
break;
case 0x10000 ... 0x10ffff:
ps->__value.__wchb[3] = 0xf0 | ((wc & 0x1c0000) >> 18);
ps->__value.__wchb[2] = 0x80 | ((wc & 0x3f000) >> 12);
ps->__value.__wchb[1] = 0x80 | ((wc & 0xfc0) >> 6);
ps->__value.__wchb[0] = 0x80 | (wc & 0x3f);
ps->__count = 0xc800 | 3;
break;
default:
ps->__count = 0;
_REENT_ERRNO(reent) = EILSEQ;
return (size_t)(-1);
}
if (pc8)
*pc8 = ps->__value.__wchb[ps->__count & 0x3];
}
return len;
}

extern "C" size_t
mbsnrtowci(wint_t *dst, const char **src, size_t nms, size_t len, mbstate_t *ps)
{
Expand Down
6 changes: 3 additions & 3 deletions winsup/doc/new-features.xml
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,12 @@ Add support for GB18030 codeset.
</para></listitem>

<listitem><para>
- New API calls: posix_spawn_file_actions_addchdir_np,
posix_spawn_file_actions_addfchdir_np.
New API calls: posix_spawn_file_actions_addchdir_np,
posix_spawn_file_actions_addfchdir_np.
</para></listitem>

<listitem><para>
- New API calls: c16rtomb, c32rtomb, mbrtoc16, mbrtoc32.
New API calls: c8rtomb, c16rtomb, c32rtomb, mbrtoc8, mbrtoc16, mbrtoc32.
</para></listitem>

</itemizedlist>
Expand Down

0 comments on commit c49bc47

Please sign in to comment.