cprover
java_bytecode_typecheck_expr.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: JAVA Bytecode Conversion / Type Checking
4 
5 Author: Daniel Kroening, kroening@kroening.com
6 
7 \*******************************************************************/
8 
11 
13 
14 #include <iomanip>
15 
16 #include <util/std_expr.h>
17 #include <util/prefix.h>
18 #include <util/arith_tools.h>
19 #include <util/unicode.h>
20 
22 
23 #include "java_pointer_casts.h"
24 #include "java_types.h"
25 
27 {
28  if(expr.id()==ID_code)
29  return typecheck_code(to_code(expr));
30 
31  if(expr.id()==ID_typecast && expr.type().id()==ID_pointer)
32  expr=make_clean_pointer_cast(expr, expr.type(), ns);
33 
34  // do operands recursively
35  Forall_operands(it, expr)
36  typecheck_expr(*it);
37 
38  if(expr.id()==ID_symbol)
40  else if(expr.id()==ID_side_effect)
41  {
42  const irep_idt &statement=to_side_effect_expr(expr).get_statement();
43  if(statement==ID_java_new)
45  else if(statement==ID_java_new_array)
47  }
48  else if(expr.id()==ID_java_string_literal)
50  else if(expr.id()==ID_member)
52 }
53 
55 {
56  assert(expr.operands().empty());
57  typet &type=expr.type();
58  typecheck_type(type);
59 }
60 
62  side_effect_exprt &expr)
63 {
64  assert(expr.operands().size()>=1); // one per dimension
65  typet &type=expr.type();
66  typecheck_type(type);
67 }
68 
69 static std::string escape_non_alnum(const std::string &toescape)
70 {
71  std::ostringstream escaped;
72  for(auto &ch : toescape)
73  {
74  if(ch=='_')
75  escaped << "__";
76  else if(isalnum(ch))
77  escaped << ch;
78  else
79  escaped << '_'
80  << std::hex
81  << std::setfill('0')
82  << std::setw(2)
83  << (unsigned int)ch;
84  }
85  return escaped.str();
86 }
87 
91 static array_exprt utf16_to_array(const std::wstring &in)
92 {
93  const auto jchar=java_char_type();
95  for(const auto c : in)
96  ret.copy_to_operands(from_integer(c, jchar));
97  return ret;
98 }
99 
101 {
102  const irep_idt value=expr.get(ID_value);
103  const symbol_typet string_type("java::java.lang.String");
104 
105  std::string escaped_symbol_name=
106  "java::java.lang.String.Literal.";
107  escaped_symbol_name+=escape_non_alnum(id2string(value));
108 
109  auto findit=symbol_table.symbols.find(escaped_symbol_name);
110  if(findit!=symbol_table.symbols.end())
111  {
112  expr=address_of_exprt(findit->second.symbol_expr());
113  return;
114  }
115 
116  // Create a new symbol:
117  symbolt new_symbol;
118  new_symbol.name=escaped_symbol_name;
119  new_symbol.type=string_type;
120  new_symbol.base_name="Literal";
121  new_symbol.pretty_name=value;
122  new_symbol.mode=ID_java;
123  new_symbol.is_type=false;
124  new_symbol.is_lvalue=true;
125  new_symbol.is_static_lifetime=true; // These are basically const global data.
126 
127  // Regardless of string refinement setting, at least initialize
128  // the literal with @clsid = String and @lock = false:
129  symbol_typet jlo_symbol("java::java.lang.Object");
130  const auto &jlo_struct=to_struct_type(ns.follow(jlo_symbol));
131  struct_exprt jlo_init(jlo_symbol);
132  const auto &jls_struct=to_struct_type(ns.follow(string_type));
133 
134  jlo_init.copy_to_operands(
136  "java::java.lang.String",
137  jlo_struct.components()[0].type()));
138  jlo_init.copy_to_operands(
139  from_integer(
140  0,
141  jlo_struct.components()[1].type()));
142 
143  // If string refinement *is* around, populate the actual
144  // contents as well:
146  {
147  struct_exprt literal_init(new_symbol.type);
148  literal_init.move_to_operands(jlo_init);
149 
150  // Initialize the string with a constant utf-16 array:
151  symbolt array_symbol;
152  array_symbol.name=escaped_symbol_name+"_constarray";
153  array_symbol.type=array_typet(
155  array_symbol.base_name="Literal_constarray";
156  array_symbol.pretty_name=value;
157  array_symbol.mode=ID_java;
158  array_symbol.is_type=false;
159  array_symbol.is_lvalue=true;
160  // These are basically const global data:
161  array_symbol.is_static_lifetime=true;
162  array_symbol.is_state_var=true;
163  auto literal_array=utf16_to_array(
165  array_symbol.value=literal_array;
166 
167  if(symbol_table.add(array_symbol))
168  throw "failed to add constarray symbol to symbol table";
169 
170  literal_init.copy_to_operands(
171  from_integer(literal_array.operands().size(),
172  jls_struct.components()[1].type()));
173  literal_init.copy_to_operands(
174  address_of_exprt(array_symbol.symbol_expr()));
175 
176  new_symbol.value=literal_init;
177  }
178  else if(jls_struct.components().size()>=1 &&
179  jls_struct.components()[0].get_name()=="@java.lang.Object")
180  {
181  // Case where something defined java.lang.String, so it has
182  // a proper base class (always java.lang.Object in practical
183  // JDKs seen so far)
184  struct_exprt literal_init(new_symbol.type);
185  literal_init.move_to_operands(jlo_init);
186  for(const auto &comp : jls_struct.components())
187  {
188  if(comp.get_name()=="@java.lang.Object")
189  continue;
190  // Other members of JDK's java.lang.String we don't understand
191  // without string-refinement. Just zero-init them; consider using
192  // test-gen-like nondet object trees instead.
193  literal_init.copy_to_operands(
194  zero_initializer(comp.type(), expr.source_location(), ns));
195  }
196  new_symbol.value=literal_init;
197  }
198  else if(jls_struct.get_bool(ID_incomplete_class))
199  {
200  // Case where java.lang.String was stubbed, and so directly defines
201  // @class_identifier and @lock:
202  new_symbol.value=jlo_init;
203  }
204 
205  if(symbol_table.add(new_symbol))
206  {
207  error() << "failed to add string literal symbol to symbol table" << eom;
208  throw 0;
209  }
210 
211  expr=address_of_exprt(new_symbol.symbol_expr());
212 }
213 
215 {
216  irep_idt identifier=expr.get_identifier();
217 
218  // does it exist already in the destination symbol table?
219  symbol_tablet::symbolst::const_iterator s_it=
220  symbol_table.symbols.find(identifier);
221 
222  if(s_it==symbol_table.symbols.end())
223  {
224  assert(has_prefix(id2string(identifier), "java::"));
225 
226  // no, create the symbol
227  symbolt new_symbol;
228  new_symbol.name=identifier;
229  new_symbol.type=expr.type();
230  new_symbol.base_name=expr.get(ID_C_base_name);
231  new_symbol.pretty_name=id2string(identifier).substr(6, std::string::npos);
232  new_symbol.mode=ID_java;
233  new_symbol.is_type=false;
234 
235  if(new_symbol.type.id()==ID_code)
236  {
237  new_symbol.is_lvalue=false;
238  }
239  else
240  {
241  new_symbol.is_lvalue=true;
242  }
243 
244  if(symbol_table.add(new_symbol))
245  {
246  error() << "failed to add expression symbol to symbol table" << eom;
247  throw 0;
248  }
249  }
250  else
251  {
252  // yes!
253  assert(!s_it->second.is_type);
254 
255  const symbolt &symbol=s_it->second;
256 
257  // type the expression
258  expr.type()=symbol.type;
259  }
260 }
261 
263 {
264  // The member might be in a parent class or an opaque class, which we resolve
265  // here.
266  const irep_idt component_name=expr.get_component_name();
267 
268  while(1)
269  {
270  typet base_type(ns.follow(expr.struct_op().type()));
271 
272  if(base_type.id()!=ID_struct)
273  break; // give up
274 
275  struct_typet &struct_type=
277 
278  if(struct_type.has_component(component_name))
279  return; // done
280 
281  // look at parent
282  struct_typet::componentst &components=
283  struct_type.components();
284 
285  if(struct_type.get_bool(ID_incomplete_class))
286  {
287  // member doesn't exist. In this case struct_type should be an opaque
288  // stub, and we'll add the member to it.
289  symbolt &symbol_table_type=
290  symbol_table.lookup("java::"+id2string(struct_type.get_tag()));
291  auto &add_to_components=
292  to_struct_type(symbol_table_type.type).components();
293  add_to_components
294  .push_back(struct_typet::componentt(component_name, expr.type()));
295  add_to_components.back().set_base_name(component_name);
296  add_to_components.back().set_pretty_name(component_name);
297  return;
298  }
299 
300  if(components.empty())
301  break;
302 
303  const struct_typet::componentt &c=components.front();
304 
305  member_exprt m(expr.struct_op(), c.get_name(), c.type());
307 
308  expr.struct_op()=m;
309  }
310 
312  warning() << "failed to find field `"
313  << component_name << "` in class hierarchy" << eom;
314 
315  // We replace by a non-det of same type
316  side_effect_expr_nondett nondet(expr.type());
317  expr.swap(nondet);
318 }
const irep_idt & get_name() const
Definition: std_types.h:179
The type of an expression.
Definition: type.h:20
mstreamt & warning()
Definition: message.h:228
irep_idt name
The unique identifier.
Definition: symbol.h:46
const typet & follow(const typet &src) const
Definition: namespace.cpp:66
symbolt & lookup(const irep_idt &identifier)
Find a symbol in the symbol table.
Linking: Zero Initialization.
const std::string & id2string(const irep_idt &d)
Definition: irep.h:44
static std::string escape_non_alnum(const std::string &toescape)
irep_idt mode
Language mode.
Definition: symbol.h:55
typet java_int_type()
Definition: java_types.cpp:19
void copy_to_operands(const exprt &expr)
Definition: expr.cpp:61
const irep_idt & get_identifier() const
Definition: std_expr.h:120
std::vector< componentt > componentst
Definition: std_types.h:240
const member_exprt & to_member_expr(const exprt &expr)
Cast a generic exprt to a member_exprt.
Definition: std_expr.h:3302
void move_to_operands(exprt &expr)
Definition: expr.cpp:28
exprt value
Initial value of symbol.
Definition: symbol.h:40
const componentst & components() const
Definition: std_types.h:242
static array_exprt utf16_to_array(const std::wstring &in)
Convert UCS-2 or UTF-16 to an array expression.
irep_idt pretty_name
Language-specific display name.
Definition: symbol.h:58
typet & type()
Definition: expr.h:60
Symbol table entry.This is a symbol in the symbol table, stored in an object of type symbol_tablet...
Definition: symbol.h:33
A constant literal expression.
Definition: std_expr.h:3685
static mstreamt & eom(mstreamt &m)
Definition: message.h:193
Structure type.
Definition: std_types.h:296
bool get_bool(const irep_namet &name) const
Definition: irep.cpp:240
bool is_static_lifetime
Definition: symbol.h:70
side_effect_exprt & to_side_effect_expr(exprt &expr)
Definition: std_code.h:1023
Extract member of struct or union.
Definition: std_expr.h:3214
JAVA Bytecode Language Type Checking.
const irep_idt & id() const
Definition: irep.h:189
An expression denoting infinity.
Definition: std_expr.h:3908
class symbol_exprt symbol_expr() const
produces a symbol_exprt for a symbol
Definition: symbol.cpp:191
exprt zero_initializer(const typet &type, const source_locationt &source_location, const namespacet &ns, message_handlert &message_handler)
symbolst symbols
Definition: symbol_table.h:57
A reference into the symbol table.
Definition: std_types.h:109
source_locationt source_location
Definition: message.h:175
API to expression classes.
const irep_idt & get(const irep_namet &name) const
Definition: irep.cpp:213
A side effect that returns a non-deterministically chosen value.
Definition: std_code.h:1037
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
void typecheck_expr_java_new_array(side_effect_exprt &)
virtual void typecheck_expr(exprt &expr)
Operator to return the address of an object.
Definition: std_expr.h:2593
bool has_component(const irep_idt &component_name) const
Definition: std_types.h:252
bool has_prefix(const std::string &s, const std::string &prefix)
Definition: converter.cpp:13
const struct_typet & to_struct_type(const typet &type)
Cast a generic typet to a struct_typet.
Definition: std_types.h:317
std::wstring utf8_to_utf16_little_endian(const std::string &in)
Definition: unicode.cpp:283
typet type
Type of symbol.
Definition: symbol.h:37
Base class for all expressions.
Definition: expr.h:46
bool is_state_var
Definition: symbol.h:66
const exprt & struct_op() const
Definition: std_expr.h:3270
irep_idt base_name
Base (non-scoped) name.
Definition: symbol.h:52
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast a generic exprt to a symbol_exprt.
Definition: std_expr.h:202
JAVA Pointer Casts.
const source_locationt & source_location() const
Definition: expr.h:142
irep_idt get_component_name() const
Definition: std_expr.h:3249
void base_type(typet &type, const namespacet &ns)
Definition: base_type.cpp:79
void swap(irept &irep)
Definition: irep.h:231
#define Forall_operands(it, expr)
Definition: expr.h:23
mstreamt & error()
Definition: message.h:223
source_locationt & add_source_location()
Definition: expr.h:147
const codet & to_code(const exprt &expr)
Definition: std_code.h:49
arrays with given size
Definition: std_types.h:901
Expression to hold a symbol (variable)
Definition: std_expr.h:82
bool is_type
Definition: symbol.h:66
typet java_char_type()
Definition: java_types.cpp:44
irep_idt get_tag() const
Definition: std_types.h:263
An expression containing a side effect.
Definition: std_code.h:997
void typecheck_expr_java_new(side_effect_exprt &)
operandst & operands()
Definition: expr.h:70
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
struct constructor from list of elements
Definition: std_expr.h:1464
array constructor from list of elements
Definition: std_expr.h:1309
exprt make_clean_pointer_cast(const exprt &rawptr, const typet &target_type, const namespacet &ns)
const irep_idt & get_statement() const
Definition: std_code.h:1012
bool is_lvalue
Definition: symbol.h:71