Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Input stream UTF-16 big endian mixed up bytes #144

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 19 additions & 19 deletions runtime/C/src/antlr3inputstream.c
Original file line number Diff line number Diff line change
Expand Up @@ -1365,15 +1365,15 @@ static ANTLR3_UCHAR
antlr3UTF16LABE(pANTLR3_INT_STREAM is, ANTLR3_INT32 la)
{
pANTLR3_INPUT_STREAM input;
UTF32 ch;
UTF32 ch2;
pANTLR3_UCHAR nextChar;
ANTLR3_UCHAR ch;
ANTLR3_UCHAR ch2;
UTF16 *nextChar;

// Find the input interface and where we are currently pointing to
// in the input stream
//
input = ((pANTLR3_INPUT_STREAM) (is->super));
nextChar = (pANTLR3_UCHAR)input->nextChar;
nextChar = input->nextChar;

// If a positive offset then advance forward, else retreat
//
Expand All @@ -1385,8 +1385,8 @@ antlr3UTF16LABE(pANTLR3_INT_STREAM is, ANTLR3_INT32 la)
//
// Next char in Big Endian byte order
//
ch = ((*nextChar) << 8) + *(nextChar+1);
nextChar += 2;
ch = ((*(pANTLR3_UINT8)nextChar) << 8) + *((pANTLR3_UINT8)nextChar+1);
nextChar++;

// If we have a surrogate pair then we need to consume
// a following valid LO surrogate.
Expand All @@ -1399,15 +1399,15 @@ antlr3UTF16LABE(pANTLR3_INT_STREAM is, ANTLR3_INT32 la)
{
// Next character is in big endian byte order
//
ch2 = ((*nextChar) << 8) + *(nextChar+1);
ch2 = ((*(pANTLR3_UINT8)nextChar) << 8) + *((pANTLR3_UINT8)nextChar+1);

// If it's a valid low surrogate, consume it
//
if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
{
// We consumed one 16 bit character
//
nextChar += 2;
nextChar ++;
}
// Note that we ignore a valid hi surrogate that has no lo surrogate to go with
// it.
Expand All @@ -1431,20 +1431,20 @@ antlr3UTF16LABE(pANTLR3_INT_STREAM is, ANTLR3_INT32 la)
{
// Get the previous 16 bit character
//
ch = ((*nextChar - 2) << 8) + (*nextChar -1);
nextChar -= 2;
ch = ((*((pANTLR3_UINT8)nextChar - 2)) << 8) + (*((pANTLR3_UINT8)nextChar -1));
nextChar --;

// If we found a low surrogate then go back one more character if
// the hi surrogate is there
//
if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END)
{
ch2 = ((*nextChar - 2) << 8) + (*nextChar -1);
ch2 = ((*((pANTLR3_UINT8)nextChar - 2)) << 8) + (*((pANTLR3_UINT8)nextChar -1));
if (ch2 >= UNI_SUR_HIGH_START && ch2 <= UNI_SUR_HIGH_END)
{
// Yes, there is a high surrogate to match it so decrement one more and point to that
//
nextChar -=2;
nextChar --;
}
}
}
Expand All @@ -1454,16 +1454,16 @@ antlr3UTF16LABE(pANTLR3_INT_STREAM is, ANTLR3_INT32 la)
//
// Input buffer size is always in bytes
//
if ( (pANTLR3_UINT8)nextChar >= (((pANTLR3_UINT8)input->data) + input->sizeBuf))
if ( (pANTLR3_UINT8)nextChar >= (((pANTLR3_UINT8)input->data) + input->sizeBuf))
{
return ANTLR3_CHARSTREAM_EOF;
return ANTLR3_CHARSTREAM_EOF;
}
else
{
// Pick up the next 16 character (big endian byte order)
//
ch = ((*nextChar) << 8) + *(nextChar+1);
nextChar += 2;
ch = ((*(pANTLR3_UINT8)nextChar) << 8) + *((pANTLR3_UINT8)nextChar+1);
nextChar ++;

// If we have a surrogate pair then we need to consume
// a following valid LO surrogate.
Expand All @@ -1472,11 +1472,11 @@ antlr3UTF16LABE(pANTLR3_INT_STREAM is, ANTLR3_INT32 la)
{
// If the 16 bits following the high surrogate are in the source buffer...
//
if ((pANTLR3_UINT8)(nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf))
if ((pANTLR3_UINT8)(nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf))
{
// Next character is in big endian byte order
//
ch2 = ((*nextChar) << 8) + *(nextChar+1);
ch2 = ((*(pANTLR3_UINT8)nextChar) << 8) + *((pANTLR3_UINT8)nextChar+1);

// If it's a valid low surrogate, consume it
//
Expand Down Expand Up @@ -2054,4 +2054,4 @@ antlr3EBCDICLA(pANTLR3_INT_STREAM is, ANTLR3_INT32 la)
//
return e2a[(*((pANTLR3_UINT8)input->nextChar + la - 1))];
}
}
}