cprover
java_bytecode_language.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module:
4 
5 Author: Daniel Kroening, kroening@kroening.com
6 
7 \*******************************************************************/
8 
10 
11 #include <string>
12 
13 #include <util/symbol_table.h>
14 #include <util/suffix.h>
15 #include <util/config.h>
16 #include <util/cmdline.h>
17 #include <util/expr_iterator.h>
19 #include <util/string2int.h>
20 #include <util/invariant.h>
21 #include <json/json_parser.h>
22 
24 
31 #include "java_entry_point.h"
32 #include "java_bytecode_parser.h"
33 #include "java_class_loader.h"
34 #include "java_string_literals.h"
36 #include "java_utils.h"
37 #include "ci_lazy_methods.h"
38 
39 #include "expr2java.h"
40 #include "load_method_by_regex.h"
41 
46 {
47  assume_inputs_non_null=cmd.isset("java-assume-inputs-non-null");
48  string_refinement_enabled = !cmd.isset("no-refine-strings");
50  cmd.isset("java-throw-runtime-exceptions") || // will go away
51  cmd.isset("throw-runtime-exceptions");
52  assert_uncaught_exceptions = !cmd.isset("disable-uncaught-exception-check");
53  throw_assertion_error = cmd.isset("throw-assertion-error");
54  threading_support = cmd.isset("java-threading");
55 
56  if(cmd.isset("java-max-input-array-length")) // will go away
57  {
59  safe_string2size_t(cmd.get_value("java-max-input-array-length"));
60  }
61  if(cmd.isset("max-nondet-array-length"))
62  {
64  safe_string2size_t(cmd.get_value("max-nondet-array-length"));
65  }
66 
67  if(cmd.isset("java-max-input-tree-depth")) // will go away
68  {
70  safe_string2size_t(cmd.get_value("java-max-input-tree-depth"));
71  }
72  if(cmd.isset("max-nondet-tree-depth"))
73  {
75  safe_string2size_t(cmd.get_value("max-nondet-tree-depth"));
76  }
77 
78  if(cmd.isset("string-max-input-length")) // will go away
79  {
81  safe_string2size_t(cmd.get_value("string-max-input-length"));
82  }
83  if(cmd.isset("max-nondet-string-length"))
84  {
86  safe_string2size_t(cmd.get_value("max-nondet-string-length"));
87  }
88 
89  object_factory_parameters.string_printable = cmd.isset("string-printable");
90  if(cmd.isset("java-max-vla-length"))
92  safe_string2size_t(cmd.get_value("java-max-vla-length"));
93  if(cmd.isset("symex-driven-lazy-loading"))
95  else if(!cmd.isset("no-lazy-methods"))
97  else
99 
101  {
102  java_load_classes.insert(
103  java_load_classes.end(),
104  exception_needed_classes.begin(),
106  }
107  if(cmd.isset("java-load-class"))
108  {
109  const auto &values = cmd.get_values("java-load-class");
110  java_load_classes.insert(
111  java_load_classes.end(), values.begin(), values.end());
112  }
113  if(cmd.isset("java-no-load-class"))
114  {
115  const auto &values = cmd.get_values("java-no-load-class");
116  no_load_classes = {values.begin(), values.end()};
117  }
118 
119  const std::list<std::string> &extra_entry_points=
120  cmd.get_values("lazy-methods-extra-entry-point");
121  std::transform(
122  extra_entry_points.begin(),
123  extra_entry_points.end(),
124  std::back_inserter(extra_methods),
126 
127  if(cmd.isset("java-cp-include-files"))
128  {
129  java_cp_include_files=cmd.get_value("java-cp-include-files");
130  // load file list from JSON file
131  if(java_cp_include_files[0]=='@')
132  {
133  jsont json_cp_config;
134  if(parse_json(
135  java_cp_include_files.substr(1),
137  json_cp_config))
138  throw "cannot read JSON input configuration for JAR loading";
139 
140  if(!json_cp_config.is_object())
141  throw "the JSON file has a wrong format";
142  jsont include_files=json_cp_config["jar"];
143  if(!include_files.is_array())
144  throw "the JSON file has a wrong format";
145 
146  // add jars from JSON config file to classpath
147  for(const jsont &file_entry : include_files.array)
148  {
150  file_entry.is_string() && has_suffix(file_entry.value, ".jar"),
151  "classpath entry must be jar filename, but '" + file_entry.value +
152  "' found");
153  config.java.classpath.push_back(file_entry.value);
154  }
155  }
156  }
157  else
159 
161 }
162 
163 std::set<std::string> java_bytecode_languaget::extensions() const
164 {
165  return { "class", "jar" };
166 }
167 
168 void java_bytecode_languaget::modules_provided(std::set<std::string> &)
169 {
170  // modules.insert(translation_unit(parse_path));
171 }
172 
175  std::istream &,
176  const std::string &,
177  std::ostream &)
178 {
179  // there is no preprocessing!
180  return true;
181 }
182 
184  std::istream &,
185  const std::string &path)
186 {
192  {
194 
195  auto get_string_base_classes = [this](const irep_idt &id) {
197  };
198 
199  java_class_loader.set_extra_class_refs_function(get_string_base_classes);
200  }
201 
202  // look at extension
203  if(has_suffix(path, ".class"))
204  {
205  // override main_class
207  }
208  else if(has_suffix(path, ".jar"))
209  {
210  // build an object to potentially limit which classes are loaded
211  java_class_loader_limitt class_loader_limit(
215  {
216  auto manifest=
217  java_class_loader.jar_pool(class_loader_limit, path).get_manifest();
218  std::string manifest_main_class=manifest["Main-Class"];
219 
220  // if the manifest declares a Main-Class line, we got a main class
221  if(manifest_main_class!="")
222  main_class=manifest_main_class;
223  }
224  else
226 
227  // do we have one now?
228  if(main_class.empty())
229  {
230  status() << "JAR file without entry point: loading class files" << eom;
231  java_class_loader.load_entire_jar(class_loader_limit, path);
232  for(const auto &kv : java_class_loader.get_jar_index(path))
233  main_jar_classes.push_back(kv.first);
234  }
235  else
237  }
238  else
239  UNREACHABLE;
240 
241  if(!main_class.empty())
242  {
243  status() << "Java main class: " << main_class << eom;
245  }
246 
247  return false;
248 }
249 
259  const java_bytecode_parse_treet &parse_tree,
260  symbol_tablet &symbol_table)
261 {
262  namespacet ns(symbol_table);
263  for(const auto &method : parse_tree.parsed_class.methods)
264  {
265  for(const java_bytecode_parse_treet::instructiont &instruction :
266  method.instructions)
267  {
268  if(instruction.statement == "getfield" ||
269  instruction.statement == "putfield")
270  {
271  const exprt &fieldref = instruction.args[0];
272  irep_idt class_symbol_id = fieldref.get(ID_class);
273  const symbolt *class_symbol = symbol_table.lookup(class_symbol_id);
274  INVARIANT(
275  class_symbol != nullptr,
276  "all types containing fields should have been loaded");
277 
278  const class_typet *class_type = &to_class_type(class_symbol->type);
279  const irep_idt &component_name = fieldref.get(ID_component_name);
280  while(!class_type->has_component(component_name))
281  {
282  if(class_type->get_bool(ID_incomplete_class))
283  {
284  // Accessing a field of an incomplete (opaque) type.
285  symbolt &writable_class_symbol =
286  symbol_table.get_writeable_ref(class_symbol_id);
287  auto &components =
288  to_struct_type(writable_class_symbol.type).components();
289  components.emplace_back(component_name, fieldref.type());
290  components.back().set_base_name(component_name);
291  components.back().set_pretty_name(component_name);
292  break;
293  }
294  else
295  {
296  // Not present here: check the superclass.
297  INVARIANT(
298  !class_type->bases().empty(),
299  "class '" + id2string(class_symbol->name)
300  + "' (which was missing a field '" + id2string(component_name)
301  + "' referenced from method '" + id2string(method.name)
302  + "') should have an opaque superclass");
303  const symbol_typet &superclass_type =
304  to_symbol_type(class_type->bases().front().type());
305  class_symbol_id = superclass_type.get_identifier();
306  class_type = &to_class_type(ns.follow(superclass_type));
307  }
308  }
309  }
310  }
311  }
312 }
313 
320  const irep_idt &class_id, symbol_tablet &symbol_table)
321 {
322  symbol_typet java_lang_Class("java::java.lang.Class");
323  symbol_exprt symbol_expr(
325  java_lang_Class);
326  if(!symbol_table.has_symbol(symbol_expr.get_identifier()))
327  {
328  symbolt new_class_symbol;
329  new_class_symbol.name = symbol_expr.get_identifier();
330  new_class_symbol.type = symbol_expr.type();
331  INVARIANT(
332  has_prefix(id2string(new_class_symbol.name), "java::"),
333  "class identifier should have 'java::' prefix");
334  new_class_symbol.base_name =
335  id2string(new_class_symbol.name).substr(6);
336  new_class_symbol.mode = ID_java;
337  new_class_symbol.is_lvalue = true;
338  new_class_symbol.is_state_var = true;
339  new_class_symbol.is_static_lifetime = true;
340  symbol_table.add(new_class_symbol);
341  }
342 
343  return symbol_expr;
344 }
345 
361  const exprt &ldc_arg0,
362  symbol_tablet &symbol_table,
363  bool string_refinement_enabled)
364 {
365  if(ldc_arg0.id() == ID_type)
366  {
367  const irep_idt &class_id = ldc_arg0.type().get(ID_identifier);
368  return
370  get_or_create_class_literal_symbol(class_id, symbol_table));
371  }
372  else if(ldc_arg0.id() == ID_java_string_literal)
373  {
374  return
377  ldc_arg0, symbol_table, string_refinement_enabled));
378  }
379  else
380  {
381  INVARIANT(
382  ldc_arg0.id() == ID_constant,
383  "ldc argument should be constant, string literal or class literal");
384  return ldc_arg0;
385  }
386 }
387 
398  java_bytecode_parse_treet &parse_tree,
399  symbol_tablet &symbol_table,
400  bool string_refinement_enabled)
401 {
402  for(auto &method : parse_tree.parsed_class.methods)
403  {
404  for(java_bytecode_parse_treet::instructiont &instruction :
405  method.instructions)
406  {
407  // ldc* instructions are Java bytecode "load constant" ops, which can
408  // retrieve a numeric constant, String literal, or Class literal.
409  if(instruction.statement == "ldc" ||
410  instruction.statement == "ldc2" ||
411  instruction.statement == "ldc_w" ||
412  instruction.statement == "ldc2_w")
413  {
414  INVARIANT(
415  instruction.args.size() != 0,
416  "ldc instructions should have an argument");
417  instruction.args[0] =
419  instruction.args[0],
420  symbol_table,
421  string_refinement_enabled);
422  }
423  }
424  }
425 }
426 
439  symbol_table_baset &symbol_table,
440  const irep_idt &symbol_id,
441  const irep_idt &symbol_basename,
442  const typet &symbol_type,
443  const irep_idt &class_id,
444  bool force_nondet_init)
445 {
446  symbolt new_symbol;
447  new_symbol.is_static_lifetime = true;
448  new_symbol.is_lvalue = true;
449  new_symbol.is_state_var = true;
450  new_symbol.name = symbol_id;
451  new_symbol.base_name = symbol_basename;
452  new_symbol.type = symbol_type;
453  new_symbol.type.set(ID_C_class, class_id);
454  // Public access is a guess; it encourages merging like-typed static fields,
455  // whereas a more restricted visbility would encourage separating them.
456  // Neither is correct, as without the class file we can't know the truth.
457  new_symbol.type.set(ID_C_access, ID_public);
458  new_symbol.pretty_name = new_symbol.name;
459  new_symbol.mode = ID_java;
460  new_symbol.is_type = false;
461  // If pointer-typed, initialise to null and a static initialiser will be
462  // created to initialise on first reference. If primitive-typed, specify
463  // nondeterministic initialisation by setting a nil value.
464  if(symbol_type.id() == ID_pointer && !force_nondet_init)
465  new_symbol.value = null_pointer_exprt(to_pointer_type(symbol_type));
466  else
467  new_symbol.value.make_nil();
468  bool add_failed = symbol_table.add(new_symbol);
469  INVARIANT(
470  !add_failed, "caller should have checked symbol not already in table");
471 }
472 
482  const irep_idt &start_class_id,
483  const symbol_tablet &symbol_table,
484  const class_hierarchyt &class_hierarchy)
485 {
486  // Depth-first search: return the first ancestor with ID_incomplete_class, or
487  // irep_idt() if none found.
488  std::vector<irep_idt> classes_to_check;
489  classes_to_check.push_back(start_class_id);
490 
491  while(!classes_to_check.empty())
492  {
493  irep_idt to_check = classes_to_check.back();
494  classes_to_check.pop_back();
495 
496  // Exclude java.lang.Object because it can
497  if(symbol_table.lookup_ref(to_check).type.get_bool(ID_incomplete_class) &&
498  to_check != "java::java.lang.Object")
499  {
500  return to_check;
501  }
502 
503  const class_hierarchyt::idst &parents =
504  class_hierarchy.class_map.at(to_check).parents;
505  classes_to_check.insert(
506  classes_to_check.end(), parents.begin(), parents.end());
507  }
508 
509  return irep_idt();
510 }
511 
522  const java_bytecode_parse_treet &parse_tree,
523  symbol_table_baset &symbol_table,
524  const class_hierarchyt &class_hierarchy,
525  messaget &log)
526 {
527  namespacet ns(symbol_table);
528  for(const auto &method : parse_tree.parsed_class.methods)
529  {
530  for(const java_bytecode_parse_treet::instructiont &instruction :
531  method.instructions)
532  {
533  if(instruction.statement == "getstatic" ||
534  instruction.statement == "putstatic")
535  {
536  INVARIANT(
537  instruction.args.size() > 0,
538  "get/putstatic should have at least one argument");
539  irep_idt component = instruction.args[0].get_string(ID_component_name);
540  INVARIANT(
541  !component.empty(), "get/putstatic should specify a component");
542  irep_idt class_id = instruction.args[0].get_string(ID_class);
543  INVARIANT(
544  !class_id.empty(), "get/putstatic should specify a class");
545 
546  // The final 'true' parameter here includes interfaces, as they can
547  // define static fields.
550  class_id,
551  component,
552  symbol_table,
553  class_hierarchy,
554  true);
555  if(!referred_component.is_valid())
556  {
557  // Create a new stub global on an arbitrary incomplete ancestor of the
558  // class that was referred to. This is just a guess, but we have no
559  // better information to go on.
560  irep_idt add_to_class_id =
562  class_id, symbol_table, class_hierarchy);
563 
564  // If there are no incomplete ancestors to ascribe the missing field
565  // to, we must have an incomplete model of a class or simply a
566  // version mismatch of some kind. Normally this would be an error, but
567  // our models library currently triggers this error in some cases
568  // (notably java.lang.System, which is missing System.in/out/err).
569  // Therefore for this case we ascribe the missing field to the class
570  // it was directly referenced from, and fall back to initialising the
571  // field in __CPROVER_initialize, rather than try to create or augment
572  // a clinit method for a non-stub class.
573 
574  bool no_incomplete_ancestors = add_to_class_id.empty();
575  if(no_incomplete_ancestors)
576  {
577  add_to_class_id = class_id;
578 
579  // TODO forbid this again once the models library has been checked
580  // for missing static fields.
581  log.warning() << "Stub static field " << component << " found for "
582  << "non-stub type " << class_id << ". In future this "
583  << "will be a fatal error." << messaget::eom;
584  }
585 
586  irep_idt identifier =
587  id2string(add_to_class_id) + "." + id2string(component);
588 
590  symbol_table,
591  identifier,
592  component,
593  instruction.args[0].type(),
594  add_to_class_id,
595  no_incomplete_ancestors);
596  }
597  }
598  }
599  }
600 }
601 
603  symbol_tablet &symbol_table,
604  const std::string &)
605 {
607 
608  java_internal_additions(symbol_table);
609 
612 
613  // Must load java.lang.Object first to avoid stubbing
614  // This ordering could alternatively be enforced by
615  // moving the code below to the class loader.
616  java_class_loadert::parse_tree_with_overridest_mapt::const_iterator it =
617  java_class_loader.get_class_with_overlays_map().find("java.lang.Object");
619  {
620  if(
622  it->second,
623  symbol_table,
629  {
630  return true;
631  }
632  }
633 
634  // first generate a new struct symbol for each class and a new function symbol
635  // for every method
636  for(const auto &class_trees : java_class_loader.get_class_with_overlays_map())
637  {
638  if(class_trees.second.front().parsed_class.name.empty())
639  continue;
640 
641  if(
643  class_trees.second,
644  symbol_table,
650  {
651  return true;
652  }
653  }
654 
655  // Now that all classes have been created in the symbol table we can populate
656  // the class hierarchy:
657  class_hierarchy(symbol_table);
658 
659  // find and mark all implicitly generic class types
660  // this can only be done once all the class symbols have been created
661  for(const auto &c : java_class_loader.get_class_with_overlays_map())
662  {
663  if(c.second.front().parsed_class.name.empty())
664  continue;
665  try
666  {
668  c.second.front().parsed_class.name, symbol_table);
669  }
671  {
673  << "Not marking class " << c.first
674  << " implicitly generic due to missing outer class symbols"
675  << messaget::eom;
676  }
677  }
678 
679  // Infer fields on opaque types based on the method instructions just loaded.
680  // For example, if we don't have bytecode for field x of class A, but we can
681  // see an int-typed getfield instruction referring to it, add that field now.
682  for(auto &class_to_trees : java_class_loader.get_class_with_overlays_map())
683  {
684  for(const java_bytecode_parse_treet &parse_tree : class_to_trees.second)
685  infer_opaque_type_fields(parse_tree, symbol_table);
686  }
687 
688  // Create global variables for constants (String and Class literals) up front.
689  // This means that when running with lazy loading, we will be aware of these
690  // literal globals' existence when __CPROVER_initialize is generated in
691  // `generate_support_functions`.
692  const std::size_t before_constant_globals_size = symbol_table.symbols.size();
693  for(auto &class_to_trees : java_class_loader.get_class_with_overlays_map())
694  {
695  for(java_bytecode_parse_treet &parse_tree : class_to_trees.second)
696  {
698  parse_tree, symbol_table, string_refinement_enabled);
699  }
700  }
701  status() << "Java: added "
702  << (symbol_table.symbols.size() - before_constant_globals_size)
703  << " String or Class constant symbols"
704  << messaget::eom;
705 
706  // For each reference to a stub global (that is, a global variable declared on
707  // a class we don't have bytecode for, and therefore don't know the static
708  // initialiser for), create a synthetic static initialiser (clinit method)
709  // to nondet initialise it.
710  // Note this must be done before making static initialiser wrappers below, as
711  // this makes a Classname.clinit method, then the next pass makes a wrapper
712  // that ensures it is only run once, and that static initialisation happens
713  // in class-graph topological order.
714 
715  {
716  journalling_symbol_tablet symbol_table_journal =
717  journalling_symbol_tablet::wrap(symbol_table);
718  for(auto &class_to_trees : java_class_loader.get_class_with_overlays_map())
719  {
720  for(const java_bytecode_parse_treet &parse_tree : class_to_trees.second)
721  {
723  parse_tree, symbol_table_journal, class_hierarchy, *this);
724  }
725  }
726 
728  symbol_table, symbol_table_journal.get_inserted(), synthetic_methods);
729  }
730 
731  // For each class that will require a static initializer wrapper, create a
732  // function named package.classname::clinit_wrapper, and a corresponding
733  // global tracking whether it has run or not:
735  symbol_table, synthetic_methods, threading_support);
736 
737  // Now incrementally elaborate methods
738  // that are reachable from this entry point.
740  {
741  // ci: context-insensitive.
743  return true;
744  }
746  {
747  journalling_symbol_tablet journalling_symbol_table =
748  journalling_symbol_tablet::wrap(symbol_table);
749 
750  // Convert all synthetic methods:
751  for(const auto &function_id_and_type : synthetic_methods)
752  {
754  function_id_and_type.first, journalling_symbol_table);
755  }
756  // Convert all methods for which we have bytecode now
757  for(const auto &method_sig : method_bytecode)
758  {
759  convert_single_method(method_sig.first, journalling_symbol_table);
760  }
761  // Now convert all newly added string methods
762  for(const auto &fn_name : journalling_symbol_table.get_inserted())
763  {
765  convert_single_method(fn_name, symbol_table);
766  }
767  }
768  // Otherwise our caller is in charge of elaborating methods on demand.
769 
770  // now instrument runtime exceptions
772  symbol_table,
775 
776  // now typecheck all
777  bool res = java_bytecode_typecheck(
779 
780  // now instrument thread-blocks and synchronized methods.
782  {
783  convert_threadblock(symbol_table);
785  }
786 
787  return res;
788 }
789 
791  symbol_tablet &symbol_table)
792 {
794 
797  if(!res.is_success())
798  return res.is_error();
799 
800  // Load the main function into the symbol table to get access to its
801  // parameter names
802  convert_lazy_method(res.main_function.name, symbol_table);
803 
804  // generate the test harness in __CPROVER__start and a call the entry point
805  return java_entry_point(
806  symbol_table,
807  main_class,
814 }
815 
830  symbol_tablet &symbol_table,
831  method_bytecodet &method_bytecode)
832 {
833  const method_convertert method_converter =
834  [this, &symbol_table]
835  (const irep_idt &function_id, ci_lazy_methods_neededt lazy_methods_needed)
836  {
837  return convert_single_method(
838  function_id, symbol_table, std::move(lazy_methods_needed));
839  };
840 
841  ci_lazy_methodst method_gather(
842  symbol_table,
843  main_class,
851 
852  return method_gather(symbol_table, method_bytecode, method_converter);
853 }
854 
855 const select_pointer_typet &
857 {
858  PRECONDITION(pointer_type_selector.get()!=nullptr);
859  return *pointer_type_selector;
860 }
861 
868  std::unordered_set<irep_idt> &methods) const
869 {
870  const std::string cprover_class_prefix = "java::org.cprover.CProver.";
871 
872  // Add all string solver methods to map
874  // Add all concrete methods to map
875  for(const auto &kv : method_bytecode)
876  {
877  const std::string &method_id = id2string(kv.first);
878 
879  // Avoid advertising org.cprover.CProver methods that the Java frontend will
880  // never provide bodies for (java_bytecode_convert_method always leaves them
881  // bodyless with intent for the driver program to stub them):
882  if(has_prefix(method_id, cprover_class_prefix))
883  {
884  std::size_t method_name_end_offset =
885  method_id.find(':', cprover_class_prefix.length());
886  INVARIANT(
887  method_name_end_offset != std::string::npos,
888  "org.cprover.CProver method should have a postfix type descriptor");
889 
890  const std::string method_name =
891  method_id.substr(
892  cprover_class_prefix.length(),
893  method_name_end_offset - cprover_class_prefix.length());
894 
895  if(cprover_methods_to_ignore.count(method_name))
896  continue;
897  }
898  methods.insert(kv.first);
899  }
900  // Add all synthetic methods to map
901  for(const auto &kv : synthetic_methods)
902  methods.insert(kv.first);
903 }
904 
914  const irep_idt &function_id,
915  symbol_table_baset &symtab)
916 {
917  const symbolt &symbol = symtab.lookup_ref(function_id);
918  if(symbol.value.is_not_nil())
919  return;
920 
921  journalling_symbol_tablet symbol_table=
922  journalling_symbol_tablet::wrap(symtab);
923 
924  convert_single_method(function_id, symbol_table);
925 
926  // Instrument runtime exceptions (unless symbol is a stub)
927  if(symbol.value.is_not_nil())
928  {
930  symbol_table,
931  symbol_table.get_writeable_ref(function_id),
934  }
935 
936  // now typecheck this function
939 }
940 
948  const exprt &function_body,
949  optionalt<ci_lazy_methods_neededt> needed_lazy_methods)
950 {
951  if(needed_lazy_methods)
952  {
953  for(const_depth_iteratort it = function_body.depth_cbegin();
954  it != function_body.depth_cend();
955  ++it)
956  {
957  if(it->id() == ID_code)
958  {
959  const auto fn_call = expr_try_dynamic_cast<code_function_callt>(*it);
960  if(!fn_call)
961  continue;
962  // Only support non-virtual function calls for now, if string solver
963  // starts to introduce virtual function calls then we will need to
964  // duplicate the behavior of java_bytecode_convert_method where it
965  // handles the invokevirtual instruction
966  const symbol_exprt &fn_sym =
967  expr_dynamic_cast<symbol_exprt>(fn_call->function());
968  needed_lazy_methods->add_needed_method(fn_sym.get_identifier());
969  }
970  }
971  }
972 }
973 
984  const irep_idt &function_id,
985  symbol_table_baset &symbol_table,
986  optionalt<ci_lazy_methods_neededt> needed_lazy_methods)
987 {
988  const symbolt &symbol = symbol_table.lookup_ref(function_id);
989 
990  // Nothing to do if body is already loaded
991  if(symbol.value.is_not_nil())
992  return false;
993 
994  // Get bytecode for specified function if we have it
996 
997  synthetic_methods_mapt::iterator synthetic_method_it;
998 
999  // Check if have a string solver implementation
1000  if(string_preprocess.implements_function(function_id))
1001  {
1002  symbolt &symbol = symbol_table.get_writeable_ref(function_id);
1003  // Load parameter names from any extant bytecode before filling in body
1004  if(cmb)
1005  {
1007  symbol, cmb->get().method.local_variable_table, symbol_table);
1008  }
1009  // Populate body of the function with code generated by string preprocess
1010  exprt generated_code =
1011  string_preprocess.code_for_function(symbol, symbol_table);
1012  INVARIANT(
1013  generated_code.is_not_nil(), "Couldn't retrieve code for string method");
1014  // String solver can make calls to functions that haven't yet been seen.
1015  // Add these to the needed_lazy_methods collection
1016  notify_static_method_calls(generated_code, needed_lazy_methods);
1017  symbol.value = generated_code;
1018  return false;
1019  }
1020  else if(
1021  (synthetic_method_it = synthetic_methods.find(function_id)) !=
1022  synthetic_methods.end())
1023  {
1024  // Synthetic method (i.e. one generated by the Java frontend and which
1025  // doesn't occur in the source bytecode):
1026  symbolt &symbol = symbol_table.get_writeable_ref(function_id);
1027  switch(synthetic_method_it->second)
1028  {
1030  if(threading_support)
1032  function_id, symbol_table);
1033  else
1034  symbol.value = get_clinit_wrapper_body(function_id, symbol_table);
1035  break;
1037  symbol.value =
1039  function_id,
1040  symbol_table,
1043  break;
1044  }
1045  // Notify lazy methods of static calls made from the newly generated
1046  // function:
1047  notify_static_method_calls(symbol.value, needed_lazy_methods);
1048  return false;
1049  }
1050 
1051  // No string solver or static init wrapper implementation;
1052  // check if have bytecode for it
1053  if(cmb)
1054  {
1056  symbol_table.lookup_ref(cmb->get().class_id),
1057  cmb->get().method,
1058  symbol_table,
1062  std::move(needed_lazy_methods),
1066  return false;
1067  }
1068 
1069  // The return of an opaque function is a source of an otherwise invisible
1070  // instantiation, so here we ensure we've loaded the appropriate classes.
1071  const java_method_typet function_type = to_java_method_type(symbol.type);
1072  if(
1073  const pointer_typet *pointer_return_type =
1074  type_try_dynamic_cast<pointer_typet>(function_type.return_type()))
1075  {
1076  // If the return type is abstract, we won't forcibly instantiate it here
1077  // otherwise this can cause abstract methods to be explictly called
1078  // TODO(tkiley): Arguably no abstract class should ever be added to
1079  // TODO(tkiley): ci_lazy_methods_neededt, but this needs further
1080  // TODO(tkiley): investigation
1081  namespacet ns{symbol_table};
1082  const java_class_typet &underlying_type =
1083  to_java_class_type(ns.follow(pointer_return_type->subtype()));
1084 
1085  if(!underlying_type.is_abstract())
1086  needed_lazy_methods->add_all_needed_classes(*pointer_return_type);
1087  }
1088 
1089  return true;
1090 }
1091 
1093 {
1095  return false;
1096 }
1097 
1099 {
1102  parse_trees.front().output(out);
1103  if(parse_trees.size() > 1)
1104  {
1105  out << "\n\nClass has the following overlays:\n\n";
1106  for(auto parse_tree_it = std::next(parse_trees.begin());
1107  parse_tree_it != parse_trees.end();
1108  ++parse_tree_it)
1109  {
1110  parse_tree_it->output(out);
1111  }
1112  out << "End of class overlays.\n";
1113  }
1114 }
1115 
1116 std::unique_ptr<languaget> new_java_bytecode_language()
1117 {
1118  return util_make_unique<java_bytecode_languaget>();
1119 }
1120 
1122  const exprt &expr,
1123  std::string &code,
1124  const namespacet &ns)
1125 {
1126  code=expr2java(expr, ns);
1127  return false;
1128 }
1129 
1131  const typet &type,
1132  std::string &code,
1133  const namespacet &ns)
1134 {
1135  code=type2java(type, ns);
1136  return false;
1137 }
1138 
1140  const std::string &code,
1141  const std::string &module,
1142  exprt &expr,
1143  const namespacet &ns)
1144 {
1145  #if 0
1146  expr.make_nil();
1147 
1148  // no preprocessing yet...
1149 
1150  std::istringstream i_preprocessed(code);
1151 
1152  // parsing
1153 
1154  java_bytecode_parser.clear();
1155  java_bytecode_parser.filename="";
1156  java_bytecode_parser.in=&i_preprocessed;
1157  java_bytecode_parser.set_message_handler(message_handler);
1158  java_bytecode_parser.grammar=java_bytecode_parsert::EXPRESSION;
1159  java_bytecode_parser.mode=java_bytecode_parsert::GCC;
1160  java_bytecode_scanner_init();
1161 
1162  bool result=java_bytecode_parser.parse();
1163 
1164  if(java_bytecode_parser.parse_tree.items.empty())
1165  result=true;
1166  else
1167  {
1168  expr=java_bytecode_parser.parse_tree.items.front().value();
1169 
1170  result=java_bytecode_convert(expr, "", message_handler);
1171 
1172  // typecheck it
1173  if(!result)
1175  }
1176 
1177  // save some memory
1178  java_bytecode_parser.clear();
1179 
1180  return result;
1181  #endif
1182 
1183  return true; // fail for now
1184 }
1185 
1187 {
1188 }
std::vector< irep_idt > java_load_classes
The type of an expression.
Definition: type.h:22
irep_idt name
The unique identifier.
Definition: symbol.h:43
const std::list< std::string > & get_values(const std::string &option) const
Definition: cmdline.cpp:110
std::string type2java(const typet &type, const namespacet &ns)
Definition: expr2java.cpp:460
void create_static_initializer_wrappers(symbol_tablet &symbol_table, synthetic_methods_mapt &synthetic_methods, const bool thread_safe)
Create static initializer wrappers for all classes that need them.
static exprt get_ldc_result(const exprt &ldc_arg0, symbol_tablet &symbol_table, bool string_refinement_enabled)
Get result of a Java load-constant (ldc) instruction.
optionalt< std::reference_wrapper< const class_method_and_bytecodet > > opt_reft
void java_bytecode_typecheck_updated_symbols(journalling_symbol_tablet &symbol_table, message_handlert &message_handler, bool string_refinement_enabled)
A static initializer wrapper (code of the form if(!already_run) clinit(); already_run = true;) These ...
bool is_object() const
Definition: json.h:49
const std::string & id2string(const irep_idt &d)
Definition: irep.h:44
bool is_not_nil() const
Definition: irep.h:173
const symbolt & lookup_ref(const irep_idt &name) const
Find a symbol in the symbol table for read-only access.
const jar_indext & get_jar_index(const std::string &jar_path)
static symbol_exprt get_or_create_class_literal_symbol(const irep_idt &class_id, symbol_tablet &symbol_table)
Create if necessary, then return the constant global java.lang.Class symbol for a given class id...
const_depth_iteratort depth_cbegin() const
Definition: expr.cpp:307
Non-graph-based representation of the class hierarchy.
static void generate_constant_global_variables(java_bytecode_parse_treet &parse_tree, symbol_tablet &symbol_table, bool string_refinement_enabled)
Creates global variables for constants mentioned in a given method.
java_class_loadert java_class_loader
std::function< bool(const irep_idt &function_id, ci_lazy_methods_neededt)> method_convertert
Process a pattern to use as a regex for selecting extra entry points for ci_lazy_methodst.
irep_idt mode
Language mode.
Definition: symbol.h:52
main_function_resultt get_main_symbol(const symbol_table_baset &symbol_table, const irep_idt &main_class, message_handlert &message_handler)
Figures out the entry point of the code to verify.
void mark_java_implicitly_generic_class_type(const irep_idt &class_name, symbol_tablet &symbol_table)
Checks if the class is implicitly generic, i.e., it is an inner class of any generic class...
void modules_provided(std::set< std::string > &modules) override
const irep_idt & get_identifier() const
Definition: std_expr.h:128
Definition: json.h:23
symbolt & get_writeable_ref(const irep_idt &name)
Find a symbol in the symbol table for read-write access.
bool java_bytecode_typecheck(symbol_table_baset &symbol_table, message_handlert &message_handler, bool string_refinement_enabled)
std::size_t safe_string2size_t(const std::string &str, int base)
Definition: string2int.cpp:59
const symbol_typet & to_symbol_type(const typet &type)
Cast a generic typet to a symbol_typet.
Definition: std_types.h:139
The null pointer constant.
Definition: std_expr.h:4518
exprt value
Initial value of symbol.
Definition: symbol.h:37
const componentst & components() const
Definition: std_types.h:245
std::string get_value(char option) const
Definition: cmdline.cpp:45
jar_filet & jar_pool(java_class_loader_limitt &limit, const std::string &filename)
Load jar archive or retrieve from cache if already loaded.
codet get_thread_safe_clinit_wrapper_body(const irep_idt &function_id, symbol_table_baset &symbol_table)
Thread safe version of the static initialiser.
irep_idt pretty_name
Language-specific display name.
Definition: symbol.h:55
const select_pointer_typet & get_pointer_type_selector() const
typet & type()
Definition: expr.h:56
const class_typet & to_class_type(const typet &type)
Cast a generic typet to a class_typet.
Definition: std_types.h:435
Symbol table entry.This is a symbol in the symbol table, stored in an object of type symbol_tablet...
Definition: symbol.h:30
java_string_library_preprocesst string_preprocess
void show_parse(std::ostream &out) override
auto expr_dynamic_cast(TExpr &base) -> typename detail::expr_dynamic_cast_return_typet< T, TExpr >::type
Cast a reference to a generic exprt to a specific derived class.
Definition: expr_cast.h:165
void initialize_conversion_table()
fill maps with correspondence from java method names to conversion functions
static mstreamt & eom(mstreamt &m)
Definition: message.h:272
bool get_bool(const irep_namet &name) const
Definition: irep.cpp:240
configt config
Definition: config.cpp:23
codet get_clinit_wrapper_body(const irep_idt &function_id, symbol_table_baset &symbol_table)
Produces the static initialiser wrapper body for the given function.
static irep_idt get_any_incomplete_ancestor_for_stub_static_field(const irep_idt &start_class_id, const symbol_tablet &symbol_table, const class_hierarchyt &class_hierarchy)
Find any incomplete ancestor of a given class that can have a stub static field attached to it...
bool parse_json(std::istream &in, const std::string &filename, message_handlert &message_handler, jsont &dest)
Definition: json_parser.cpp:16
virtual void convert_lazy_method(const irep_idt &function_id, symbol_table_baset &symbol_table) override
Promote a lazy-converted method (one whose type is known but whose body hasn&#39;t been converted) into a...
bool is_static_lifetime
Definition: symbol.h:67
std::list< java_bytecode_parse_treet > parse_tree_with_overlayst
#define INVARIANT(CONDITION, REASON)
Definition: invariant.h:204
void load_entire_jar(java_class_loader_limitt &, const std::string &jar_path)
object_factory_parameterst object_factory_parameters
mstreamt & warning() const
Definition: message.h:307
bool from_expr(const exprt &expr, std::string &code, const namespacet &ns) override
Formats the given expression in a language-specific way.
Class Hierarchy.
static void create_stub_global_symbols(const java_bytecode_parse_treet &parse_tree, symbol_table_baset &symbol_table, const class_hierarchyt &class_hierarchy, messaget &log)
Search for getstatic and putstatic instructions in a class&#39; bytecode and create stub symbols for any ...
JAVA Bytecode Language Type Checking.
const irep_idt & id() const
Definition: irep.h:259
bool java_bytecode_convert_class(const java_class_loadert::parse_tree_with_overlayst &parse_trees, symbol_tablet &symbol_table, message_handlert &message_handler, size_t max_array_length, method_bytecodet &method_bytecode, java_string_library_preprocesst &string_preprocess, const std::unordered_set< std::string > &no_load_classes)
See class java_bytecode_convert_classt.
static void create_stub_global_symbol(symbol_table_baset &symbol_table, const irep_idt &symbol_id, const irep_idt &symbol_basename, const typet &symbol_type, const irep_idt &class_id, bool force_nondet_init)
Add a stub global symbol to the symbol table, initialising pointer-typed symbols with null and primit...
classpatht classpath
Definition: config.h:155
std::set< std::string > extensions() const override
virtual bool isset(char option) const
Definition: cmdline.cpp:27
static std::string file_to_class_name(const std::string &)
void convert_synchronized_methods(symbol_tablet &symbol_table, message_handlert &message_handler)
Iterate through the symbol table to find and instrument synchronized methods.
A reference into the symbol table.
Definition: std_types.h:110
size_t max_nondet_tree_depth
Maximum depth for object hierarchy on input.
virtual void methods_provided(std::unordered_set< irep_idt > &methods) const override
Provide feedback to language_filest so that when asked for a lazy method, it can delegate to this ins...
resolve_inherited_componentt::inherited_componentt get_inherited_component(const irep_idt &component_class_id, const irep_idt &component_name, const symbol_tablet &symbol_table, const class_hierarchyt &class_hierarchy, bool include_interfaces)
Finds an inherited component (method or field), taking component visibility into account.
Definition: java_utils.cpp:339
The pointer type.
Definition: std_types.h:1435
void get_all_function_names(std::unordered_set< irep_idt > &methods) const
static void notify_static_method_calls(const exprt &function_body, optionalt< ci_lazy_methods_neededt > needed_lazy_methods)
Notify ci_lazy_methods, if present, of any static function calls made by the given function body...
JAVA Bytecode Language Conversion.
bool string_printable
Force string content to be ASCII printable characters when set to true.
virtual void get_language_options(const cmdlinet &) override
Consume options that are java bytecode specific.
fixed_keys_map_wrappert< parse_tree_with_overridest_mapt > get_class_with_overlays_map()
Map from class names to the bytecode parse trees.
void add_jar_file(const std::string &f)
bool typecheck(symbol_tablet &context, const std::string &module) override
void set_java_cp_include_files(const std::string &java_cp_include_files)
nonstd::optional< T > optionalt
Definition: optional.h:35
virtual bool preprocess(std::istream &instream, const std::string &path, std::ostream &outstream) override
ANSI-C preprocessing.
The symbol table.
Definition: symbol_table.h:19
const irep_idt & get(const irep_namet &name) const
Definition: irep.cpp:213
Collect methods needed to be loaded using the lazy method.
TO_BE_DOCUMENTED.
Definition: namespace.h:74
std::vector< load_extra_methodst > extra_methods
codet get_stub_initializer_body(const irep_idt &function_id, symbol_table_baset &symbol_table, const object_factory_parameterst &object_factory_parameters, const select_pointer_typet &pointer_type_selector)
Create the body of a synthetic static initializer (clinit method), which initialise stub globals in t...
void java_bytecode_convert_method(const symbolt &class_symbol, const java_bytecode_parse_treet::methodt &method, symbol_table_baset &symbol_table, message_handlert &message_handler, size_t max_array_length, bool throw_assertion_error, optionalt< ci_lazy_methods_neededt > needed_lazy_methods, java_string_library_preprocesst &string_preprocess, const class_hierarchyt &class_hierarchy, bool threading_support)
std::string id() const override
#define PRECONDITION(CONDITION)
Definition: invariant.h:242
std::unique_ptr< languaget > new_java_bytecode_language()
bool has_prefix(const std::string &s, const std::string &prefix)
Definition: converter.cpp:13
class_mapt class_map
A function call.
Definition: std_code.h:858
const std::unordered_set< std::string > cprover_methods_to_ignore
Methods belonging to the class org.cprover.CProver that should be ignored (not converted), leaving the driver program to stub them if it wishes.
Definition: java_utils.cpp:425
virtual void set_message_handler(message_handlert &_message_handler)
Definition: message.h:148
struct configt::javat java
const typet & follow(const typet &) const
Definition: namespace.cpp:55
synthetic_methods_mapt synthetic_methods
Maps synthetic method names on to the particular type of synthetic method (static initializer...
const struct_typet & to_struct_type(const typet &type)
Cast a generic typet to a struct_typet.
Definition: std_types.h:318
dstringt has one field, an unsigned integer no which is an index into a static table of strings...
Definition: dstring.h:33
Author: Diffblue Ltd.
Operator to return the address of an object.
Definition: std_expr.h:3168
bool language_options_initialized
Definition: language.h:182
const symbolst & symbols
bool to_expr(const std::string &code, const std::string &module, exprt &expr, const namespacet &ns) override
Parses the given string into an expression.
bool from_type(const typet &type, std::string &code, const namespacet &ns) override
Formats the given type in a language-specific way.
bool has_symbol(const irep_idt &name) const
Check whether a symbol exists in the symbol table.
bool implements_function(const irep_idt &function_id) const
void add_load_classes(const std::vector< irep_idt > &classes)
Adds the list of classes to the load queue, forcing them to be loaded even without explicit reference...
bool has_component(const irep_idt &component_name) const
Definition: std_types.h:255
An exception that is raised checking whether a class is implicitly generic if a symbol for an outer c...
std::vector< irep_idt > idst
const std::unique_ptr< const select_pointer_typet > pointer_type_selector
void create_stub_global_initializer_symbols(symbol_tablet &symbol_table, const std::unordered_set< irep_idt > &stub_globals_set, synthetic_methods_mapt &synthetic_methods)
Create static initializer symbols for each distinct class that has stub globals.
std::unordered_set< std::string > no_load_classes
void java_bytecode_instrument_symbol(symbol_table_baset &symbol_table, symbolt &symbol, const bool throw_runtime_exceptions, message_handlert &message_handler)
Instruments the code attached to symbol with runtime exceptions or corresponding assertions.
typet type
Type of symbol.
Definition: symbol.h:34
std::function< std::vector< irep_idt >const symbol_tablet &symbol_table)> build_load_method_by_regex(const std::string &pattern)
Create a lambda that returns the symbols that the given pattern should be loaded.If the pattern doesn...
message_handlert & get_message_handler()
Definition: message.h:153
bool do_ci_lazy_method_conversion(symbol_tablet &, method_bytecodet &)
Uses a simple context-insensitive (&#39;ci&#39;) analysis to determine which methods may be reachable from th...
const java_method_typet & to_java_method_type(const typet &type)
Definition: java_types.h:289
bool is_string() const
Definition: json.h:39
size_t max_nondet_array_length
Maximum value for the non-deterministically-chosen length of an array.
bool is_array() const
Definition: json.h:54
std::string value
Definition: json.h:137
mstreamt & result() const
Definition: message.h:312
mstreamt & status() const
Definition: message.h:317
void set_extra_class_refs_function(get_extra_class_refs_functiont func)
Sets a function that provides extra dependencies for a particular class.
exprt code_for_function(const symbolt &symbol, symbol_table_baset &symbol_table)
Should be called to provide code for string functions that are used in the code but for which no impl...
Base class for all expressions.
Definition: expr.h:42
bool is_state_var
Definition: symbol.h:63
static void infer_opaque_type_fields(const java_bytecode_parse_treet &parse_tree, symbol_tablet &symbol_table)
Infer fields that must exist on opaque types from field accesses against them.
size_t max_nondet_string_length
Maximum value for the non-deterministically-chosen length of a string.
bool parse(std::istream &instream, const std::string &path) override
irep_idt base_name
Base (non-scoped) name.
Definition: symbol.h:49
The symbol table base class interface.
const std::vector< std::string > exception_needed_classes
bool is_abstract() const
Definition: std_types.h:419
#define JAVA_CLASS_MODEL_SUFFIX
#define UNREACHABLE
Definition: invariant.h:271
const_depth_iteratort depth_cend() const
Definition: expr.cpp:309
symbol_exprt get_or_create_string_literal_symbol(const exprt &string_expr, symbol_table_baset &symbol_table, bool string_refinement_enabled)
Creates or gets an existing constant global symbol for a given string literal.
const basest & bases() const
Definition: std_types.h:386
void make_nil()
Definition: irep.h:315
void convert_threadblock(symbol_tablet &symbol_table)
Iterate through the symbol table to find and appropriately instrument thread-blocks.
void java_internal_additions(symbol_table_baset &dest)
Expression to hold a symbol (variable)
Definition: std_expr.h:90
A generated (synthetic) static initializer function for a stub type.
bool has_suffix(const std::string &s, const std::string &suffix)
Definition: suffix.h:15
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
const pointer_typet & to_pointer_type(const typet &type)
Cast a generic typet to a pointer_typet.
Definition: std_types.h:1459
Author: Diffblue Ltd.
lazy_methods_modet lazy_methods_mode
irep_idt main_class
Definition: config.h:156
dstringt irep_idt
Definition: irep.h:32
bool generate_support_functions(symbol_tablet &symbol_table) override
Create language-specific support functions, such as __CPROVER_start, __CPROVER_initialize and languag...
opt_reft get(const irep_idt &method_id)
void java_bytecode_instrument(symbol_tablet &symbol_table, const bool throw_runtime_exceptions, message_handlert &message_handler)
Instruments all the code in the symbol_table with runtime exceptions or corresponding assertions...
bool is_type
Definition: symbol.h:63
JAVA Bytecode Language Conversion.
#define DATA_INVARIANT(CONDITION, REASON)
Definition: invariant.h:278
C++ class type.
Definition: std_types.h:341
arrayt array
Definition: json.h:129
void java_bytecode_initialize_parameter_names(symbolt &method_symbol, const java_bytecode_parse_treet::methodt::local_variable_tablet &local_variable_table, symbol_table_baset &symbol_table)
This uses a cut-down version of the logic in java_bytecode_convert_methodt::convert to initialize sym...
stub_global_initializer_factoryt stub_global_initializer_factory
const symbolt * lookup(const irep_idt &name) const
Find a symbol in the symbol table for read-only access.
message_handlert * message_handler
Definition: message.h:342
bool empty() const
Definition: dstring.h:73
std::unordered_map< std::string, std::string > get_manifest()
Get contents of the Manifest file in the jar archive.
Definition: jar_file.cpp:98
const typet & return_type() const
Definition: std_types.h:895
const java_class_typet & to_java_class_type(const typet &type)
Definition: java_types.h:227
virtual bool final(symbol_table_baset &context) override
Final adjustments, e.g.
std::vector< irep_idt > main_jar_classes
std::vector< irep_idt > get_string_type_base_classes(const irep_idt &class_name)
Gets the base classes for known String and String-related types, or returns an empty list for other t...
void set(const irep_namet &name, const irep_idt &value)
Definition: irep.h:286
std::string expr2java(const exprt &expr, const namespacet &ns)
Definition: expr2java.cpp:453
bool java_entry_point(symbol_table_baset &symbol_table, const irep_idt &main_class, message_handlert &message_handler, bool assume_init_pointers_not_null, bool assert_uncaught_exceptions, const object_factory_parameterst &object_factory_parameters, const select_pointer_typet &pointer_type_selector, bool string_refinement_enabled)
Given the symbol_table and the main_class to test, this function generates a new function __CPROVER__...
void convert_single_method(const irep_idt &function_id, symbol_table_baset &symbol_table)
auto expr_try_dynamic_cast(TExpr &base) -> typename detail::expr_try_dynamic_cast_return_typet< T, TExpr >::type
Try to cast a reference to a generic exprt to a specific derived class.
Definition: expr_cast.h:91
bool is_lvalue
Definition: symbol.h:68