cprover
java_bytecode_convert_class.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: JAVA Bytecode Language Conversion
4 
5 Author: Daniel Kroening, kroening@kroening.com
6 
7 \*******************************************************************/
8 
11 
13 
14 #ifdef DEBUG
15 #include <iostream>
16 #endif
17 
18 #include "java_root_class.h"
19 #include "java_types.h"
21 #include "java_bytecode_language.h"
22 
23 #include <util/c_types.h>
24 #include <util/namespace.h>
25 #include <util/std_expr.h>
26 
28 
30 {
31 public:
33  symbol_tablet &_symbol_table,
34  message_handlert &_message_handler,
35  size_t _max_array_length,
36  lazy_methodst& _lazy_methods,
37  lazy_methods_modet _lazy_methods_mode,
38  bool _string_refinement_enabled,
39  const character_refine_preprocesst &_character_preprocess):
40  messaget(_message_handler),
41  symbol_table(_symbol_table),
42  max_array_length(_max_array_length),
43  lazy_methods(_lazy_methods),
44  lazy_methods_mode(_lazy_methods_mode),
45  string_refinement_enabled(_string_refinement_enabled),
46  character_preprocess(_character_preprocess)
47  {
48  }
49 
50  void operator()(const java_bytecode_parse_treet &parse_tree)
51  {
53 
54  if(parse_tree.loading_successful)
55  convert(parse_tree.parsed_class);
56  else if(string_refinement_enabled &&
57  parse_tree.parsed_class.name=="java.lang.String")
59  else
61  }
62 
65 
66 protected:
68  const size_t max_array_length;
73 
74  // conversion
75  void convert(const classt &c);
76  void convert(symbolt &class_symbol, const fieldt &f);
77 
78  void generate_class_stub(const irep_idt &class_name);
79  void add_array_types();
80  void add_string_type();
81 };
82 
84 {
85  std::string qualified_classname="java::"+id2string(c.name);
86  if(symbol_table.has_symbol(qualified_classname))
87  {
88  debug() << "Skip class " << c.name << " (already loaded)" << eom;
89  return;
90  }
91 
92  class_typet class_type;
93 
94  class_type.set_tag(c.name);
95  class_type.set(ID_base_name, c.name);
96  if(c.is_enum)
97  class_type.set(
98  ID_java_enum_static_unwind,
99  std::to_string(c.enum_elements+1));
100 
101  if(!c.extends.empty())
102  {
103  symbol_typet base("java::"+id2string(c.extends));
104  class_type.add_base(base);
105  class_typet::componentt base_class_field;
106  base_class_field.type()=base;
107  base_class_field.set_name("@"+id2string(c.extends));
108  base_class_field.set_base_name("@"+id2string(c.extends));
109  base_class_field.set_pretty_name("@"+id2string(c.extends));
110  class_type.components().push_back(base_class_field);
111  }
112 
113  // interfaces are recorded as bases
114  for(const auto &interface : c.implements)
115  {
116  symbol_typet base("java::"+id2string(interface));
117  class_type.add_base(base);
118  }
119 
120  // produce class symbol
121  symbolt new_symbol;
122  new_symbol.base_name=c.name;
123  new_symbol.pretty_name=c.name;
124  new_symbol.name=qualified_classname;
125  class_type.set(ID_name, new_symbol.name);
126  new_symbol.type=class_type;
127  new_symbol.mode=ID_java;
128  new_symbol.is_type=true;
129 
130  symbolt *class_symbol;
131 
132  // add before we do members
133  if(symbol_table.move(new_symbol, class_symbol))
134  {
135  error() << "failed to add class symbol " << new_symbol.name << eom;
136  throw 0;
137  }
138 
139  // now do fields
140  for(const auto &field : c.fields)
141  convert(*class_symbol, field);
142 
143  // now do methods
144  for(const auto &method : c.methods)
145  {
146  const irep_idt method_identifier=
147  id2string(qualified_classname)+
148  "."+id2string(method.name)+
149  ":"+method.signature;
150  // Always run the lazy pre-stage, as it symbol-table
151  // registers the function.
153  *class_symbol,
154  method_identifier,
155  method,
156  symbol_table);
158  {
159  // Upgrade to a fully-realized symbol now:
161  *class_symbol,
162  method,
163  symbol_table,
167  }
168  else
169  {
170  // Wait for our caller to decide what needs elaborating.
171  lazy_methods[method_identifier]=std::make_pair(class_symbol, &method);
172  }
173  }
174 
175  // is this a root class?
176  if(c.extends.empty())
177  java_root_class(*class_symbol);
178 }
179 
181  const irep_idt &class_name)
182 {
183  class_typet class_type;
184 
185  class_type.set_tag(class_name);
186  class_type.set(ID_base_name, class_name);
187 
188  class_type.set(ID_incomplete_class, true);
189 
190  // produce class symbol
191  symbolt new_symbol;
192  new_symbol.base_name=class_name;
193  new_symbol.pretty_name=class_name;
194  new_symbol.name="java::"+id2string(class_name);
195  class_type.set(ID_name, new_symbol.name);
196  new_symbol.type=class_type;
197  new_symbol.mode=ID_java;
198  new_symbol.is_type=true;
199 
200  symbolt *class_symbol;
201 
202  if(symbol_table.move(new_symbol, class_symbol))
203  {
204  warning() << "stub class symbol " << new_symbol.name
205  << " already exists" << eom;
206  }
207  else
208  {
209  // create the class identifier etc
210  java_root_class(*class_symbol);
211  }
212 }
213 
215  symbolt &class_symbol,
216  const fieldt &f)
217 {
218  typet field_type=java_type_from_string(f.signature);
219 
220  // is this a static field?
221  if(f.is_static)
222  {
223  // Create the symbol; we won't add to the struct type.
224  symbolt new_symbol;
225 
226  new_symbol.is_static_lifetime=true;
227  new_symbol.is_lvalue=true;
228  new_symbol.is_state_var=true;
229  new_symbol.name=id2string(class_symbol.name)+"."+id2string(f.name);
230  new_symbol.base_name=f.name;
231  new_symbol.type=field_type;
232  new_symbol.pretty_name=id2string(class_symbol.pretty_name)+
233  "."+id2string(f.name);
234  new_symbol.mode=ID_java;
235  new_symbol.is_type=false;
236  const namespacet ns(symbol_table);
237  new_symbol.value=
239  field_type,
240  class_symbol.location,
241  ns,
243 
244  // Do we have the static field symbol already?
245  const auto s_it=symbol_table.symbols.find(new_symbol.name);
246  if(s_it!=symbol_table.symbols.end())
247  symbol_table.symbols.erase(s_it); // erase, we stubbed it
248 
249  if(symbol_table.add(new_symbol))
250  assert(false && "failed to add static field symbol");
251  }
252  else
253  {
254  class_typet &class_type=to_class_type(class_symbol.type);
255 
256  class_type.components().push_back(class_typet::componentt());
257  class_typet::componentt &component=class_type.components().back();
258 
259  component.set_name(f.name);
260  component.set_base_name(f.name);
261  component.set_pretty_name(f.name);
262  component.type()=field_type;
263 
264  if(f.is_private)
265  component.set_access(ID_private);
266  else if(f.is_protected)
267  component.set_access(ID_protected);
268  else if(f.is_public)
269  component.set_access(ID_public);
270  else
271  component.set_access(ID_default);
272  }
273 }
274 
276 {
277  const std::string letters="ijsbcfdza";
278 
279  for(const char l : letters)
280  {
281  symbol_typet symbol_type=
282  to_symbol_type(java_array_type(l).subtype());
283 
284  struct_typet struct_type;
285  // we have the base class, java.lang.Object, length and data
286  // of appropriate type
287  struct_type.set_tag(symbol_type.get_identifier());
288 
289  struct_type.components().reserve(3);
291  comp0("@java.lang.Object", symbol_typet("java::java.lang.Object"));
292  struct_type.components().push_back(comp0);
293 
294  struct_typet::componentt comp1("length", java_int_type());
295  struct_type.components().push_back(comp1);
296 
298  comp2("data", pointer_type(java_type_from_char(l)));
299  struct_type.components().push_back(comp2);
300 
301  symbolt symbol;
302  symbol.name=symbol_type.get_identifier();
303  symbol.base_name=symbol_type.get(ID_C_base_name);
304  symbol.is_type=true;
305  symbol.type=struct_type;
306  symbol_table.add(symbol);
307  }
308 }
309 
311  const java_bytecode_parse_treet &parse_tree,
312  symbol_tablet &symbol_table,
313  message_handlert &message_handler,
314  size_t max_array_length,
315  lazy_methodst &lazy_methods,
316  lazy_methods_modet lazy_methods_mode,
317  bool string_refinement_enabled,
318  const character_refine_preprocesst &character_preprocess)
319 {
321  symbol_table,
322  message_handler,
323  max_array_length,
324  lazy_methods,
325  lazy_methods_mode,
326  string_refinement_enabled,
327  character_preprocess);
328 
329  try
330  {
331  java_bytecode_convert_class(parse_tree);
332  return false;
333  }
334 
335  catch(int)
336  {
337  }
338 
339  catch(const char *e)
340  {
342  }
343 
344  catch(const std::string &e)
345  {
347  }
348 
349  return true;
350 }
351 
355 {
356  class_typet string_type;
357  string_type.set_tag("java.lang.String");
358  string_type.components().resize(3);
359  string_type.components()[0].set_name("@java.lang.Object");
360  string_type.components()[0].set_pretty_name("@java.lang.Object");
361  string_type.components()[0].type()=symbol_typet("java::java.lang.Object");
362  string_type.components()[1].set_name("length");
363  string_type.components()[1].set_pretty_name("length");
364  string_type.components()[1].type()=java_int_type();
365  string_type.components()[2].set_name("data");
366  string_type.components()[2].set_pretty_name("data");
367  // Use a pointer-to-unbounded-array instead of a pointer-to-char.
368  // Saves some casting in the string refinement algorithm but may
369  // be unnecessary.
370  string_type.components()[2].type()=pointer_type(
372  string_type.add_base(symbol_typet("java::java.lang.Object"));
373 
374  symbolt string_symbol;
375  string_symbol.name="java::java.lang.String";
376  string_symbol.base_name="java.lang.String";
377  string_symbol.type=string_type;
378  string_symbol.is_type=true;
379 
380  symbol_table.add(string_symbol);
381 
382  // Also add a stub of `String.equals` so that remove-virtual-functions
383  // generates a check for Object.equals vs. String.equals.
384  // No need to fill it in, as pass_preprocess will replace the calls again.
385  symbolt string_equals_symbol;
386  string_equals_symbol.name=
387  "java::java.lang.String.equals:(Ljava/lang/Object;)Z";
388  string_equals_symbol.base_name="java.lang.String.equals";
389  string_equals_symbol.pretty_name="java.lang.String.equals";
390  string_equals_symbol.mode=ID_java;
391 
392  code_typet string_equals_type;
393  string_equals_type.return_type()=java_boolean_type();
394  code_typet::parametert thisparam;
395  thisparam.set_this();
396  thisparam.type()=java_reference_type(symbol_typet(string_symbol.name));
397  code_typet::parametert otherparam;
398  otherparam.type()=java_reference_type(symbol_typet("java::java.lang.Object"));
399  string_equals_type.parameters().push_back(thisparam);
400  string_equals_type.parameters().push_back(otherparam);
401  string_equals_symbol.type=std::move(string_equals_type);
402 
403  symbol_table.add(string_equals_symbol);
404 }
The type of an expression.
Definition: type.h:20
mstreamt & warning()
Definition: message.h:228
irep_idt name
The unique identifier.
Definition: symbol.h:46
void java_bytecode_convert_method_lazy(const symbolt &class_symbol, const irep_idt &method_identifier, const java_bytecode_parse_treet::methodt &m, symbol_tablet &symbol_table)
This creates a method symbol in the symtab, but doesn&#39;t actually perform method conversion just yet...
Linking: Zero Initialization.
character_refine_preprocesst character_preprocess
Base type of functions.
Definition: std_types.h:734
const std::string & id2string(const irep_idt &d)
Definition: irep.h:44
pointer_typet pointer_type(const typet &subtype)
Definition: c_types.cpp:296
java_bytecode_parse_treet::fieldt fieldt
void java_root_class(symbolt &class_symbol)
typet java_boolean_type()
Definition: java_types.cpp:59
irep_idt mode
Language mode.
Definition: symbol.h:55
java_bytecode_parse_treet::classt classt
java_bytecode_convert_classt(symbol_tablet &_symbol_table, message_handlert &_message_handler, size_t _max_array_length, lazy_methodst &_lazy_methods, lazy_methods_modet _lazy_methods_mode, bool _string_refinement_enabled, const character_refine_preprocesst &_character_preprocess)
reference_typet java_reference_type(const typet &subtype)
Definition: java_types.cpp:67
typet java_int_type()
Definition: java_types.cpp:19
void set_base_name(const irep_idt &base_name)
Definition: std_types.h:194
void set_name(const irep_idt &name)
Definition: std_types.h:184
exprt value
Initial value of symbol.
Definition: symbol.h:40
const componentst & components() const
Definition: std_types.h:242
irep_idt pretty_name
Language-specific display name.
Definition: symbol.h:58
typet & type()
Definition: expr.h:60
Symbol table entry.This is a symbol in the symbol table, stored in an object of type symbol_tablet...
Definition: symbol.h:33
static mstreamt & eom(mstreamt &m)
Definition: message.h:193
Structure type.
Definition: std_types.h:296
reference_typet java_array_type(const char subtype)
Definition: java_types.cpp:77
bool java_bytecode_convert_class(const java_bytecode_parse_treet &parse_tree, symbol_tablet &symbol_table, message_handlert &message_handler, size_t max_array_length, lazy_methodst &lazy_methods, lazy_methods_modet lazy_methods_mode, bool string_refinement_enabled, const character_refine_preprocesst &character_preprocess)
bool is_static_lifetime
Definition: symbol.h:70
void java_bytecode_convert_method(const symbolt &class_symbol, const java_bytecode_parse_treet::methodt &method, symbol_tablet &symbol_table, message_handlert &message_handler, size_t max_array_length, safe_pointer< ci_lazy_methodst > lazy_methods, const character_refine_preprocesst &character_refine)
An expression denoting infinity.
Definition: std_expr.h:3908
lazy_methods_modet
typet java_type_from_string(const std::string &src)
Definition: java_types.cpp:152
exprt zero_initializer(const typet &type, const source_locationt &source_location, const namespacet &ns, message_handlert &message_handler)
const symbol_typet & to_symbol_type(const typet &type)
Cast a generic typet to a symbol_typet.
Definition: std_types.h:142
symbolst symbols
Definition: symbol_table.h:57
A reference into the symbol table.
Definition: std_types.h:109
void add_string_type()
Implements the java.lang.String type in the case that we provide an internal implementation.
JAVA Bytecode Language Conversion.
void set_access(const irep_idt &access)
Definition: std_types.h:204
typet java_type_from_char(char t)
Definition: java_types.cpp:111
API to expression classes.
The symbol table.
Definition: symbol_table.h:52
const irep_idt & get(const irep_namet &name) const
Definition: irep.cpp:213
TO_BE_DOCUMENTED.
Definition: namespace.h:62
void set_tag(const irep_idt &tag)
Definition: std_types.h:264
void operator()(const java_bytecode_parse_treet &parse_tree)
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
std::map< irep_idt, lazy_method_valuet > lazy_methodst
bool move(symbolt &symbol, symbolt *&new_symbol)
Move a symbol into the symbol table.
typet type
Type of symbol.
Definition: symbol.h:37
source_locationt location
Source code location of definition of symbol.
Definition: symbol.h:43
mstreamt & debug()
Definition: message.h:253
message_handlert & get_message_handler()
Definition: message.h:127
bool is_state_var
Definition: symbol.h:66
const parameterst & parameters() const
Definition: std_types.h:841
irep_idt base_name
Base (non-scoped) name.
Definition: symbol.h:52
void set_pretty_name(const irep_idt &name)
Definition: std_types.h:214
bool has_symbol(const irep_idt &name) const
Definition: symbol_table.h:87
mstreamt & error()
Definition: message.h:223
arrays with given size
Definition: std_types.h:901
bool is_type
Definition: symbol.h:66
typet java_char_type()
Definition: java_types.cpp:44
JAVA Bytecode Language Conversion.
C++ class type.
Definition: std_types.h:334
bool empty() const
Definition: dstring.h:61
const typet & return_type() const
Definition: std_types.h:831
const irep_idt & get_identifier() const
Definition: std_types.h:126
void generate_class_stub(const irep_idt &class_name)
void add_base(const typet &base)
Definition: std_types.h:375
void set(const irep_namet &name, const irep_idt &value)
Definition: irep.h:214
bool is_lvalue
Definition: symbol.h:71
const class_typet & to_class_type(const typet &type)
Cast a generic typet to a class_typet.
Definition: std_types.h:407