* fix wide character literals by implicitly converting from utf-8 (which we assume as the input charset) to utf-32 (which is used by Haiku's wchars)
git-svn-id: file:///srv/svn/repos/haiku/buildtools/trunk@37474 a95241bf-73f2-0310-859d-f6bbb57e9c96
Diff
legacy/gcc/gcc/c-lex.c | 105 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
legacy/gcc/gcc/cp/lex.c | 207 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------
2 files changed, 233 insertions(+), 79 deletions(-)
@@ -126,12 +126,12 @@
static char *extend_token_buffer PROTO((const char *));
static int readescape PROTO((int *));
static void parse_float PROTO((PTR));
/* Do not insert generated code into the source, instead, include it.
This allows us to build gcc automatically even for targets that
need to add or modify the reserved keyword lists. */
#include "c-gperf.h"
/* Return something to represent absolute declarators containing a *.
TARGET is the absolute declarator that the * contains.
TYPE_QUALS is a list of modifiers such as const or volatile
@@ -146,7 +146,7 @@
{
return build1 (INDIRECT_REF, type_quals, target);
}
void
forget_protocol_qualifiers ()
{
@@ -177,7 +177,7 @@
else if (wordlist[i].rid == RID_ONEWAY)
wordlist[i].name = "oneway";
}
char *
init_parse (filename)
char *filename;
@@ -311,7 +311,7 @@
reinit_parse_for_function ()
{
}
void
@@ -356,7 +356,7 @@
break;
}
}
/* Iff C is a carriage return, warn about it - if appropriate -
and return nonzero. */
static int
@@ -493,7 +493,7 @@
return token_buffer + offset;
}
#if defined HANDLE_PRAGMA
static int
@@ -858,7 +858,7 @@
c = GETC();
return c;
}
#ifdef HANDLE_GENERIC_PRAGMAS
/* Handle a #pragma directive.
@@ -906,7 +906,7 @@
}
#endif
#define ENDFILE -1 /* token that represents end-of-file */
/* Read an escape sequence, returning its equivalent as a character,
@@ -1046,7 +1046,7 @@
pedwarn ("unknown escape sequence: `\\' followed by char code 0x%x", c);
return c;
}
void
yyerror (msgid)
const char *msgid;
@@ -1089,7 +1089,7 @@
{ &long_long_unsigned_type_node, 1, 1, 1}
};
#endif
struct pf_args
{
@@ -1103,7 +1103,7 @@
REAL_VALUE_TYPE value;
};
static void
parse_float (data)
PTR data;
@@ -1203,7 +1203,7 @@
warning ("floating point number exceeds range of `double'");
}
}
int
yylex ()
{
@@ -1662,7 +1662,7 @@
imag = args.imag;
type = args.type;
conversion_errno = args.conversion_errno;
#ifdef ERANGE
/* ERANGE is also reported for underflow,
so test the value to distinguish overflow from that. */
@@ -1985,6 +1985,44 @@
#endif
}
#else
if (wide_flag && (c & 0x80) != 0)
{
unsigned int value;
int len;
switch (c & 0xF0)
{
case 0xF0:
len = 4;
value = c ^ 0xF0;
break;
case 0xE0:
len = 3;
value = c ^ 0xE0;
break;
case 0xC0:
len = 2;
value = c ^ 0xC0;
break;
default:
warning ("Skipping invalid UTF-8 byte in wide char "
"literal");
continue;
}
int i;
for (i = 1; i < len; ++i)
{
c = GETC ();
if ((c & 0xC0) != 0x80)
{
warning ("Encountered invalid UTF-8 sequence in "
"wide char literal");
UNGETC (c);
break;
}
value = (value << 6) | (c & 0x7F);
}
c = value;
}
#ifdef MAP_CHARACTER
c = MAP_CHARACTER (c);
#endif
@@ -2112,6 +2150,45 @@
continue;
}
c = wc;
}
#else
if (wide_flag && (c & 0x80) != 0)
{
unsigned int value;
int len;
switch (c & 0xF0)
{
case 0xF0:
len = 4;
value = c ^ 0xF0;
break;
case 0xE0:
len = 3;
value = c ^ 0xE0;
break;
case 0xC0:
len = 2;
value = c ^ 0xC0;
break;
default:
warning ("Skipping invalid UTF-8 byte in wide char "
"string literal");
continue;
}
int i;
for (i = 1; i < len; ++i)
{
c = GETC ();
if ((c & 0xC0) != 0x80)
{
warning ("Encountered invalid UTF-8 sequence in "
"wide char string literal");
UNGETC (c);
break;
}
value = (value << 6) | (c & 0x7F);
}
c = value;
}
#endif
}
@@ -141,7 +141,7 @@
i.e., opname_tab[PLUS_EXPR] == "+". */
char **opname_tab;
char **assignop_tab;
extern int yychar;
extern YYSTYPE yylval;
@@ -170,7 +170,7 @@
extern int *token_count;
/* Return something to represent absolute declarators containing a *.
TARGET is the absolute declarator that the * contains.
CV_QUALIFIERS is a list of modifiers such as const or volatile
@@ -198,7 +198,7 @@
We return an ADDR_EXPR whose "contents" are TARGET
and whose type is the modifier list. */
tree
make_reference_declarator (cv_qualifiers, target)
tree cv_qualifiers, target;
@@ -239,7 +239,7 @@
TREE_OPERAND (call_declarator, 2) = cv_qualifiers;
TREE_TYPE (call_declarator) = exception_specification;
}
tree ansi_opname[LAST_CPLUS_TREE_CODE];
@@ -285,7 +285,7 @@
else
return opname_tab[i];
}
int interface_only; /* whether or not current file is only for
interface definitions. */
int interface_unknown; /* whether or not we know this class
@@ -309,7 +309,7 @@
Actual allocated length is maxtoken + 2. */
#include "hash.h"
static int ignore_escape_flag = 0;
@@ -330,7 +330,7 @@
push_obstacks_nochange ();
end_temporary_allocation ();
TIME_IDENTIFIER_TIME (time_identifier) = build_int_2 (0, 0);
TIME_IDENTIFIER_FILEINFO (time_identifier)
TIME_IDENTIFIER_FILEINFO (time_identifier)
= build_int_2 (0, 1);
SET_IDENTIFIER_GLOBAL_VALUE (time_identifier, filename_times);
filename_times = time_identifier;
@@ -352,7 +352,7 @@
quiet_flag = old_quiet_flag;
return this_time;
}
/* Table indexed by tree code giving a string containing a character
classifying the tree code. Possibilities are
t, d, s, c, r, <, 1 and 2. See cp/cp-tree.def for details. */
@@ -386,7 +386,7 @@
#include "cp-tree.def"
};
#undef DEFTREECODE
void
@@ -439,7 +439,7 @@
{
header_time = 0;
body_time = my_get_run_time ();
TREE_INT_CST_LOW (TIME_IDENTIFIER_TIME (this_filename_time))
TREE_INT_CST_LOW (TIME_IDENTIFIER_TIME (this_filename_time))
= body_time;
}
}
@@ -894,7 +894,7 @@
current_base_init_list = NULL_TREE;
current_member_init_list = NULL_TREE;
}
#ifdef __GNUC__
__inline
#endif
@@ -988,7 +988,7 @@
int i;
int maxlen = REDUCE_LENGTH;
unsigned *sorted;
if (reduce_count[-1] == 0)
return;
@@ -1044,8 +1044,8 @@
warning ("YYDEBUG not defined.");
#endif
}
/* Functions and data structures for #pragma interface.
`#pragma implementation' means that the main file being compiled
@@ -1084,7 +1084,7 @@
if (flag_alt_external_templates)
{
struct tinst_level *til = tinst_for_decl ();
if (til)
fileinfo = get_time_identifier (til->file);
}
@@ -1169,7 +1169,7 @@
}
return 0;
}
/* Set up the state required to correctly handle the definition of the
inline function whose preparsed state has been saved in PI. */
@@ -1234,7 +1234,7 @@
if (t == 0)
return;
begin_definition_of_inclass_inline (t);
}
@@ -1251,7 +1251,7 @@
{
tree context;
struct pending_inline *i = (struct pending_inline *) TREE_PURPOSE (t);
context = hack_decl_function_context (i->fndecl);
context = hack_decl_function_context (i->fndecl);
if (context)
pop_cp_function_context (context);
i = i->next;
@@ -1526,7 +1526,7 @@
}
else
c = getch ();
while (c != EOF)
{
int this_lineno = lineno;
@@ -1661,7 +1661,7 @@
}
else
c = getch ();
while (c != EOF)
{
int this_lineno = lineno;
@@ -1798,7 +1798,7 @@
else
{
push_obstacks (&inline_text_obstack, &inline_text_obstack);
defarg_fns = tree_cons (current_class_type, decl, defarg_fns);
defarg_fns = tree_cons (current_class_type, decl, defarg_fns);
pop_obstacks ();
}
}
@@ -1839,7 +1839,7 @@
}
yychar = YYEMPTY;
end_input ();
}
}
/* Main function for deferred parsing of default arguments. Called from
the parser. */
@@ -1982,7 +1982,7 @@
fn = grokfield (declarator, declspecs, NULL_TREE, NULL_TREE, NULL_TREE);
}
if (fn == void_type_node)
return fn;
@@ -2033,14 +2033,14 @@
SET_DECL_ARTIFICIAL (fn);
/* [zooey] force functions of local classes into such a mode, that they
will be output even after being inlined. This circumvents a bug that
causes undefined symbols when linking against files using local classes. */
if (DECL_CONTEXT (fn) != NULL_TREE && hack_decl_function_context (fn))
TREE_ADDRESSABLE(fn) = 1;
TREE_ADDRESSABLE(fn) = 1;
return fn;
}
@@ -2099,7 +2099,7 @@
}
clear_anon_tags ();
}
/* If C is not whitespace, return C.
Otherwise skip whitespace and return first nonwhite char read. */
@@ -2159,7 +2159,7 @@
return token_buffer + offset;
}
static int
get_last_nonwhite_on_line ()
{
@@ -2182,7 +2182,7 @@
pragma_getc ()
{
int c;
if (nextchar != EOF)
{
c = nextchar;
@@ -2233,7 +2233,7 @@
linemode = 1;
do
@@ -2276,7 +2276,7 @@
IDENTIFIER_POINTER (yylval.ttype)))
goto skipline;
#endif
#ifdef HANDLE_GENERIC_PRAGMAS
if (handle_generic_pragma (token))
goto skipline;
@@ -2289,7 +2289,7 @@
|| (warn_unknown_pragmas && ! in_system_header))
warning ("ignoring pragma: %s", token_buffer);
}
goto skipline;
}
else if (c == 'd')
@@ -2460,7 +2460,7 @@
strcpy (input_filename, TREE_STRING_POINTER (yylval.ttype));
lineno = l;
GNU_xref_file (input_filename);
if (main_input_filename == 0)
{
struct impl_files *ifiles = impl_file_chain;
@@ -2628,7 +2628,7 @@
for (; pending_lang_change < 0; ++pending_lang_change)
pop_lang_context ();
}
#define ENDFILE -1 /* token that represents end-of-file */
/* Read an escape sequence, returning its equivalent as a character,
@@ -2770,7 +2770,7 @@
{
if (TREE_CODE (DECL_RESULT (decl)) == TYPE_DECL)
return PTYPENAME;
else if (looking_for_template)
else if (looking_for_template)
return PFUNCNAME;
}
if (looking_for_template && really_overloaded_fn (decl))
@@ -2780,7 +2780,7 @@
decl = TREE_VALUE (decl);
for (t = decl; t != NULL_TREE; t = OVL_CHAIN (t))
if (DECL_FUNCTION_TEMPLATE_P (OVL_FUNCTION (t)))
if (DECL_FUNCTION_TEMPLATE_P (OVL_FUNCTION (t)))
return PFUNCNAME;
}
if (TREE_CODE (decl) == NAMESPACE_DECL)
@@ -2923,7 +2923,7 @@
id = lookup_name (token, 0);
return error_mark_node;
}
if (!id)
{
if (current_template_parms)
@@ -3026,7 +3026,7 @@
like local variables, rather than creating TEMPLATE_DECLs for the
local variables and then finding matching instantiations. */
if (current_template_parms
&& (is_overloaded_fn (id)
&& (is_overloaded_fn (id)
/* If it's not going to be around at instantiation time, we
look it up then. This is a hack, and should go when we
really get dependent/independent name lookup right. */
@@ -3034,14 +3034,14 @@
/* Some local VAR_DECLs (such as those for local variables
in member functions of local classes) are built on the
permanent obstack. */
|| (TREE_CODE (id) == VAR_DECL
|| (TREE_CODE (id) == VAR_DECL
&& CP_DECL_CONTEXT (id)
&& TREE_CODE (CP_DECL_CONTEXT (id)) == FUNCTION_DECL)
|| TREE_CODE (id) == PARM_DECL
|| TREE_CODE (id) == RESULT_DECL
|| TREE_CODE (id) == USING_DECL))
id = build_min_nt (LOOKUP_EXPR, token);
return id;
}
@@ -3060,7 +3060,7 @@
id = NULL_TREE;
else
id = BINDING_VALUE (id);
}
}
else
id = IDENTIFIER_GLOBAL_VALUE (token);
if (parsing && yychar == YYEMPTY)
@@ -3146,7 +3146,7 @@
return NULL_TREE;
}
struct pf_args
struct pf_args
{
@@ -3169,11 +3169,11 @@
REAL_VALUE_ATOF may not work any more. */
char *copy = (char *) alloca (args->p - token_buffer + 1);
bcopy (token_buffer, copy, args->p - token_buffer + 1);
while (1)
{
int lose = 0;
switch (args->c)
{
@@ -3182,13 +3182,13 @@
error ("more than one `f' in numeric constant");
fflag = 1;
break;
case 'l': case 'L':
if (lflag)
error ("more than one `l' in numeric constant");
lflag = 1;
break;
case 'i': case 'I':
if (args->imag)
error ("more than one `i' or `j' in numeric constant");
@@ -3196,30 +3196,30 @@
pedwarn ("ANSI C++ forbids imaginary numeric constants");
args->imag = 1;
break;
default:
lose = 1;
}
if (lose)
break;
if (args->p >= token_buffer + maxtoken - 3)
args->p = extend_token_buffer (args->p);
*(args->p++) = args->c;
*(args->p) = 0;
args->c = getch ();
}
/* The second argument, machine_mode, of REAL_VALUE_ATOF
tells the desired precision of the binary result
of decimal-to-binary conversion. */
if (fflag)
{
if (lflag)
error ("both `f' and `l' in floating constant");
args->type = float_type_node;
args->value = REAL_VALUE_ATOF (copy, TYPE_MODE (args->type));
/* A diagnostic is required here by some ANSI C testsuites.
@@ -3436,7 +3436,7 @@
{
/* We are looking at a string. Complain
if the token before the string is no `extern'.
Could cheat some memory by placing this string
on the temporary_, instead of the saveable_
obstack. */
@@ -3812,7 +3812,7 @@
args.c = c;
args.imag = imag;
args.type = type;
if (do_float_handler (parse_float, (PTR) &args))
{
@@ -3831,7 +3831,7 @@
c = args.c;
imag = args.imag;
type = args.type;
#ifdef ERANGE
if (errno == ERANGE && pedantic)
{
@@ -3940,8 +3940,8 @@
<< (i * HOST_BITS_PER_CHAR));
low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
}
yylval.ttype = build_int_2 (low, high);
TREE_TYPE (yylval.ttype) = long_long_unsigned_type_node;
@@ -4095,6 +4095,44 @@
#endif
}
#else
if (wide_flag && (c & 0x80) != 0)
{
unsigned int value;
int len;
switch (c & 0xF0)
{
case 0xF0:
len = 4;
value = c ^ 0xF0;
break;
case 0xE0:
len = 3;
value = c ^ 0xE0;
break;
case 0xC0:
len = 2;
value = c ^ 0xC0;
break;
default:
warning ("Skipping invalid UTF-8 byte in wide char"
"literal");
continue;
}
int i;
for (i = 1; i < len; ++i)
{
c = getch ();
if ((c & 0xC0) != 0x80)
{
warning ("Encountered invalid UTF-8 sequence in "
"wide char literal");
put_back (c);
break;
}
value = (value << 6) | (c & 0x7F);
}
c = value;
}
#ifdef MAP_CHARACTER
c = MAP_CHARACTER (c);
#endif
@@ -4228,6 +4266,45 @@
continue;
}
c = wc;
}
#else
if (wide_flag && (c & 0x80) != 0)
{
unsigned int value;
int len;
switch (c & 0xF0)
{
case 0xF0:
len = 4;
value = c ^ 0xF0;
break;
case 0xE0:
len = 3;
value = c ^ 0xE0;
break;
case 0xC0:
len = 2;
value = c ^ 0xC0;
break;
default:
warning ("Skipping invalid UTF-8 byte in wide char "
"string literal");
continue;
}
int i;
for (i = 1; i < len; ++i)
{
c = getch ();
if ((c & 0xC0) != 0x80)
{
warning ("Encountered invalid UTF-8 sequence in "
"wide char string literal");
put_back (c);
break;
}
value = (value << 6) | (c & 0x7F);
}
c = value;
}
#endif
}
@@ -4754,9 +4831,9 @@
#endif
char buf[1024];
va_list ap;
VA_START (ap, msg);
#ifndef ANSI_PROTOTYPES
msg = va_arg (ap, const char *);
#endif
@@ -4764,7 +4841,7 @@
vsprintf (buf, msg, ap);
error_with_file_and_line (input_filename, lineno, "%s (compiler error)", buf);
}
void
yyerror (string)
const char *string;
@@ -4794,7 +4871,7 @@
error (buf, token_buffer);
}
static int
handle_cp_pragma (pname)
const char *pname;
@@ -4843,14 +4920,14 @@
}
else if (! strcmp (pname, "interface"))
{
tree fileinfo
tree fileinfo
= TIME_IDENTIFIER_FILEINFO (get_time_identifier (input_filename));
char *main_filename = input_filename;
main_filename = file_name_nondirectory (main_filename);
token = real_yylex ();
if (token != END_OF_LINE)
{
if (token != STRING
@@ -4890,7 +4967,7 @@
interface_only = interface_strcmp (main_filename);
if (!optimize || flag_multiple_symbol_spaces)
if (!optimize || flag_multiple_symbol_spaces)
{
#ifdef WORK_AROUND_PRAGMA_INTERFACE_BUG
interface_only = 0;
@@ -4909,7 +4986,7 @@
}
else if (! strcmp (pname, "implementation"))
{
tree fileinfo
tree fileinfo
= TIME_IDENTIFIER_FILEINFO (get_time_identifier (input_filename));
char *main_filename = main_input_filename ? main_input_filename : input_filename;
@@ -4999,7 +5076,7 @@
return TYPE_UNQUALIFIED;
}
#ifdef HANDLE_GENERIC_PRAGMAS
/* Handle a #pragma directive. TOKEN is the type of the word following
@@ -5035,7 +5112,7 @@
default:
handle_pragma_token (token_buffer, NULL_TREE);
}
token = real_yylex ();
}
}