cprover
linker_script_merge.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Linker Script Merging
4 
5 Author: Kareem Khazem <karkhaz@karkhaz.com>, 2017
6 
7 \*******************************************************************/
8 
9 #include "linker_script_merge.h"
10 
11 #include <algorithm>
12 #include <fstream>
13 #include <iterator>
14 
15 #include <util/arith_tools.h>
16 #include <util/c_types.h>
17 #include <util/expr_initializer.h>
18 #include <util/magic.h>
19 #include <util/run.h>
20 #include <util/tempfile.h>
21 
22 #include <json/json_parser.h>
23 
25 
27 
29 {
30  if(!cmdline.isset('T'))
31  return 0;
32 
33  temporary_filet linker_def_outfile("goto-cc-linker-info", ".json");
34  std::list<irep_idt> linker_defined_symbols;
35  int fail=
37  linker_defined_symbols,
39  elf_binary,
40  linker_def_outfile());
41  // ignore linker script parsing failures until the code is tested more widely
42  if(fail!=0)
43  return 0;
44 
45  jsont data;
46  fail=parse_json(linker_def_outfile(), get_message_handler(), data);
47  if(fail!=0)
48  {
49  error() << "Problem parsing linker script JSON data" << eom;
50  return fail;
51  }
52 
54  if(fail!=0)
55  {
56  error() << "Malformed linker-script JSON document" << eom;
57  data.output(error());
58  return fail;
59  }
60 
61  symbol_tablet original_st;
62  goto_functionst original_gf;
63 
64  fail=read_goto_binary(goto_binary, original_st, original_gf,
66 
67  if(fail!=0)
68  {
69  error() << "Unable to read goto binary for linker script merging" << eom;
70  return fail;
71  }
72 
73  fail=1;
74  linker_valuest linker_values;
75  const auto &pair=original_gf.function_map.find(INITIALIZE_FUNCTION);
76  if(pair==original_gf.function_map.end())
77  {
78  error() << "No " << INITIALIZE_FUNCTION << " found in goto_functions"
79  << eom;
80  return fail;
81  }
83  data,
84  cmdline.get_value('T'),
85  pair->second.body,
86  original_st,
87  linker_values);
88  if(fail!=0)
89  {
90  error() << "Could not add linkerscript defs to " INITIALIZE_FUNCTION << eom;
91  return fail;
92  }
93 
94  fail=goto_and_object_mismatch(linker_defined_symbols, linker_values);
95  if(fail!=0)
96  return fail;
97 
98  // The keys of linker_values are exactly the elements of
99  // linker_defined_symbols, so iterate over linker_values from now on.
100 
101  fail=pointerize_linker_defined_symbols(original_gf, original_st,
102  linker_values);
103 
104  if(fail!=0)
105  {
106  error() << "Could not pointerize all linker-defined expressions" << eom;
107  return fail;
108  }
109 
110  fail=compiler.write_object_file(goto_binary, original_st, original_gf);
111 
112  if(fail!=0)
113  error() << "Could not write linkerscript-augmented binary" << eom;
114 
115  return fail;
116 }
117 
119  compilet &compiler,
120  const std::string &elf_binary,
121  const std::string &goto_binary,
122  const cmdlinet &cmdline,
124  messaget(message_handler), compiler(compiler),
125  elf_binary(elf_binary), goto_binary(goto_binary),
126  cmdline(cmdline),
127  replacement_predicates(
128  {
129  replacement_predicatet("address of array's first member",
130  [](const exprt &expr) -> const symbol_exprt&
131  { return to_symbol_expr(expr.op0().op0()); },
132  [](const exprt &expr, const namespacet &)
133  {
134  return expr.id()==ID_address_of &&
135  expr.type().id()==ID_pointer &&
136 
137  expr.op0().id()==ID_index &&
138  expr.op0().type().id()==ID_unsignedbv &&
139 
140  expr.op0().op0().id()==ID_symbol &&
141  expr.op0().op0().type().id()==ID_array &&
142 
143  expr.op0().op1().id()==ID_constant &&
144  expr.op0().op1().type().id()==ID_signedbv;
145  }),
146  replacement_predicatet("address of array",
147  [](const exprt &expr) -> const symbol_exprt&
148  { return to_symbol_expr(expr.op0()); },
149  [](const exprt &expr, const namespacet &)
150  {
151  return expr.id()==ID_address_of &&
152  expr.type().id()==ID_pointer &&
153 
154  expr.op0().id()==ID_symbol &&
155  expr.op0().type().id()==ID_array;
156  }),
157  replacement_predicatet("address of struct",
158  [](const exprt &expr) -> const symbol_exprt&
159  { return to_symbol_expr(expr.op0()); },
160  [](const exprt &expr, const namespacet &ns)
161  {
162  return expr.id()==ID_address_of &&
163  expr.type().id()==ID_pointer &&
164 
165  expr.op0().id()==ID_symbol &&
166  ns.follow(expr.op0().type()).id()==ID_struct;
167  }),
168  replacement_predicatet("array variable",
169  [](const exprt &expr) -> const symbol_exprt&
170  { return to_symbol_expr(expr); },
171  [](const exprt &expr, const namespacet &)
172  {
173  return expr.id()==ID_symbol &&
174  expr.type().id()==ID_array;
175  }),
176  replacement_predicatet("pointer (does not need pointerizing)",
177  [](const exprt &expr) -> const symbol_exprt&
178  { return to_symbol_expr(expr); },
179  [](const exprt &expr, const namespacet &)
180  {
181  return expr.id()==ID_symbol &&
182  expr.type().id()==ID_pointer;
183  })
184  })
185 {}
186 
188  goto_functionst &goto_functions,
189  symbol_tablet &symbol_table,
190  const linker_valuest &linker_values)
191 {
192  const namespacet ns(symbol_table);
193 
194  int ret=0;
195  // First, pointerize the actual linker-defined symbols
196  for(const auto &pair : linker_values)
197  {
198  const auto maybe_symbol=symbol_table.get_writeable(pair.first);
199  if(!maybe_symbol)
200  continue;
201  symbolt &entry=*maybe_symbol;
202  entry.type=pointer_type(char_type());
203  entry.is_extern=false;
204  entry.value=pair.second.second;
205  }
206 
207  // Next, find all occurrences of linker-defined symbols that are _values_
208  // of some symbol in the symbol table, and pointerize them too
209  for(const auto &pair : symbol_table.symbols)
210  {
211  std::list<symbol_exprt> to_pointerize;
212  symbols_to_pointerize(linker_values, pair.second.value, to_pointerize);
213 
214  if(to_pointerize.empty())
215  continue;
216  debug() << "Pointerizing the symbol-table value of symbol " << pair.first
217  << eom;
218  int fail=pointerize_subexprs_of(
219  symbol_table.get_writeable_ref(pair.first).value,
220  to_pointerize,
221  linker_values,
222  ns);
223  if(to_pointerize.empty() && fail==0)
224  continue;
225  ret=1;
226  for(const auto &sym : to_pointerize)
227  error() << " Could not pointerize '" << sym.get_identifier()
228  << "' in symbol table entry " << pair.first << ". Pretty:\n"
229  << sym.pretty() << "\n";
230  error() << eom;
231  }
232 
233  // Finally, pointerize all occurrences of linker-defined symbols in the
234  // goto program
235  for(auto &gf : goto_functions.function_map)
236  {
237  goto_programt &program=gf.second.body;
239  {
240  for(exprt *insts : std::list<exprt *>({&(iit->code), &(iit->guard)}))
241  {
242  std::list<symbol_exprt> to_pointerize;
243  symbols_to_pointerize(linker_values, *insts, to_pointerize);
244  if(to_pointerize.empty())
245  continue;
246  debug() << "Pointerizing a program expression..." << eom;
247  int fail = pointerize_subexprs_of(
248  *insts, to_pointerize, linker_values, ns);
249  if(to_pointerize.empty() && fail==0)
250  continue;
251  ret=1;
252  for(const auto &sym : to_pointerize)
253  {
254  error() << " Could not pointerize '" << sym.get_identifier()
255  << "' in function " << gf.first << ". Pretty:\n"
256  << sym.pretty() << "\n";
257  error().source_location=iit->source_location;
258  }
259  error() << eom;
260  }
261  }
262  }
263  return ret;
264 }
265 
267  exprt &old_expr,
268  const linker_valuest &linker_values,
269  const symbol_exprt &old_symbol,
270  const irep_idt &ident,
271  const std::string &shape)
272 {
273  auto it=linker_values.find(ident);
274  if(it==linker_values.end())
275  {
276  error() << "Could not find a new expression for linker script-defined "
277  << "symbol '" << ident << "'" << eom;
278  return 1;
279  }
280  symbol_exprt new_expr=it->second.first;
281  new_expr.add_source_location()=old_symbol.source_location();
282  debug() << "Pointerizing linker-defined symbol '" << ident << "' of shape <"
283  << shape << ">." << eom;
284  old_expr=new_expr;
285  return 0;
286 }
287 
289  exprt &expr,
290  std::list<symbol_exprt> &to_pointerize,
291  const linker_valuest &linker_values,
292  const namespacet &ns)
293 {
294  int fail=0, tmp=0;
295  for(auto const &pair : linker_values)
296  for(auto const &pattern : replacement_predicates)
297  {
298  if(!pattern.match(expr, ns))
299  continue;
300  // take a copy, expr will be changed below
301  const symbol_exprt inner_symbol=pattern.inner_symbol(expr);
302  if(pair.first!=inner_symbol.get_identifier())
303  continue;
304  tmp=replace_expr(expr, linker_values, inner_symbol, pair.first,
305  pattern.description());
306  fail=tmp?tmp:fail;
307  auto result=std::find(to_pointerize.begin(), to_pointerize.end(),
308  inner_symbol);
309  if(result==to_pointerize.end())
310  {
311  fail=1;
312  error() << "Too many removals of '" << inner_symbol.get_identifier()
313  << "'" << eom;
314  }
315  else
316  to_pointerize.erase(result);
317  // If we get here, we've just pointerized this expression. That expression
318  // will be a symbol possibly wrapped in some unary junk, but won't contain
319  // other symbols as subexpressions that might need to be pointerized. So
320  // it's safe to bail out here.
321  return fail;
322  }
323 
324  for(auto &op : expr.operands())
325  {
326  tmp=pointerize_subexprs_of(op, to_pointerize, linker_values, ns);
327  fail=tmp?tmp:fail;
328  }
329  return fail;
330 }
331 
333  const linker_valuest &linker_values,
334  const exprt &expr,
335  std::list<symbol_exprt> &to_pointerize) const
336 {
337  for(const auto &op : expr.operands())
338  {
339  if(op.id()!=ID_symbol)
340  continue;
341  const symbol_exprt &sym_exp=to_symbol_expr(op);
342  const auto &pair=linker_values.find(sym_exp.get_identifier());
343  if(pair!=linker_values.end())
344  to_pointerize.push_back(sym_exp);
345  }
346  for(const auto &op : expr.operands())
347  symbols_to_pointerize(linker_values, op, to_pointerize);
348 }
349 
350 #if 0
351 The current implementation of this function is less precise than the
352  commented-out version below. To understand the difference between these
353  implementations, consider the following example:
354 
355 Suppose we have a section in the linker script, 100 bytes long, where the
356 address of the symbol sec_start is the start of the section (value 4096) and the
357 address of sec_end is the end of that section (value 4196).
358 
359 The current implementation synthesizes the goto-version of the following C:
360 
361  char __sec_array[100];
362  char *sec_start=(&__sec_array[0]);
363  char *sec_end=((&__sec_array[0])+100);
364  // Yes, it is 100 not 99. We're pointing to the end of the memory occupied
365  // by __sec_array, not the last element of __sec_array.
366 
367 This is imprecise for the following reason: the actual address of the array and
368 the pointers shall be some random CBMC-internal address, instead of being 4096
369 and 4196. The linker script, on the other hand, would have specified the exact
370 position of the section, and we even know what the actual values of sec_start
371 and sec_end are from the object file (these values are in the `addresses` list
372 of the `data` argument to this function). If the correctness of the code depends
373 on these actual values, then CBMCs model of the code is too imprecise to verify
374 this.
375 
376 The commented-out version of this function below synthesizes the following:
377 
378  char *sec_start=4096;
379  char *sec_end=4196;
380  __CPROVER_allocated_memory(4096, 100);
381 
382 This code has both the actual addresses of the start and end of the section and
383 tells CBMC that the intermediate region is valid. However, the allocated_memory
384 macro does not currently allocate an actual object at the address 4096, so
385 symbolic execution can fail. In particular, the 'allocated memory' is part of
386 __CPROVER_memory, which does not have a bounded size; this means that (for
387 example) calls to memcpy or memset fail, because the first and third arguments
388 do not have know n size. The commented-out implementation should be reinstated
389 once this limitation of __CPROVER_allocated_memory has been fixed.
390 
391 In either case, no other changes to the rest of the code (outside this function)
392 should be necessary. The rest of this file converts expressions containing the
393 linker-defined symbol into pointer types if they were not already, and this is
394 the right behaviour for both implementations.
395 #endif
397  jsont &data,
398  const std::string &linker_script,
399  goto_programt &gp,
400  symbol_tablet &symbol_table,
401  linker_valuest &linker_values)
402 #if 1
403 {
404  goto_programt::instructionst initialize_instructions=gp.instructions;
405  std::map<irep_idt, std::size_t> truncated_symbols;
406  for(auto &d : data["regions"].array)
407  {
408  bool has_end=d["has-end-symbol"].is_true();
409 
410  std::ostringstream array_name;
411  array_name << CPROVER_PREFIX << "linkerscript-array_"
412  << d["start-symbol"].value;
413  if(has_end)
414  array_name << "-" << d["end-symbol"].value;
415 
416 
417  // Array symbol_exprt
418  mp_integer array_size = string2integer(d["size"].value);
419  if(array_size > MAX_FLATTENED_ARRAY_SIZE)
420  {
421  warning() << "Object section '" << d["section"].value << "' of size "
422  << array_size << " is too large to model. Truncating to "
423  << MAX_FLATTENED_ARRAY_SIZE << " bytes" << eom;
424  array_size=MAX_FLATTENED_ARRAY_SIZE;
425  if(!has_end)
426  truncated_symbols[d["size-symbol"].value]=MAX_FLATTENED_ARRAY_SIZE;
427  }
428 
429  constant_exprt array_size_expr=from_integer(array_size, size_type());
430  array_typet array_type(char_type(), array_size_expr);
431  symbol_exprt array_expr(array_name.str(), array_type);
432  source_locationt array_loc;
433 
434  array_loc.set_file(linker_script);
435  std::ostringstream array_comment;
436  array_comment << "Object section '" << d["section"].value << "' of size "
437  << array_size << " bytes";
438  array_loc.set_comment(array_comment.str());
439  array_expr.add_source_location()=array_loc;
440 
441  // Array start address
442  index_exprt zero_idx(array_expr, from_integer(0, size_type()));
443  address_of_exprt array_start(zero_idx);
444 
445  // Linker-defined symbol_exprt pointing to start address
446  symbol_exprt start_sym(d["start-symbol"].value, pointer_type(char_type()));
447  linker_values[d["start-symbol"].value]=std::make_pair(start_sym,
448  array_start);
449 
450  // Since the value of the pointer will be a random CBMC address, write a
451  // note about the real address in the object file
452  auto it=std::find_if(data["addresses"].array.begin(),
453  data["addresses"].array.end(),
454  [&d](const jsont &add)
455  { return add["sym"].value==d["start-symbol"].value; });
456  if(it==data["addresses"].array.end())
457  {
458  error() << "Start: Could not find address corresponding to symbol '"
459  << d["start-symbol"].value << "' (start of section)" << eom;
460  return 1;
461  }
462  source_locationt start_loc;
463  start_loc.set_file(linker_script);
464  std::ostringstream start_comment;
465  start_comment << "Pointer to beginning of object section '"
466  << d["section"].value << "'. Original address in object file"
467  << " is " << (*it)["val"].value;
468  start_loc.set_comment(start_comment.str());
469  start_sym.add_source_location()=start_loc;
470 
471  // Instruction for start-address pointer in __CPROVER_initialize
472  code_assignt start_assign(start_sym, array_start);
473  start_assign.add_source_location()=start_loc;
474  goto_programt::instructiont start_assign_i;
475  start_assign_i.make_assignment(start_assign);
476  start_assign_i.source_location=start_loc;
477  initialize_instructions.push_front(start_assign_i);
478 
479  if(has_end) // Same for pointer to end of array
480  {
481  plus_exprt array_end(array_start, array_size_expr);
482 
483  symbol_exprt end_sym(d["end-symbol"].value, pointer_type(char_type()));
484  linker_values[d["end-symbol"].value]=std::make_pair(end_sym, array_end);
485 
486  auto it=std::find_if(data["addresses"].array.begin(),
487  data["addresses"].array.end(),
488  [&d](const jsont &add)
489  { return add["sym"].value==d["end-symbol"].value; });
490  if(it==data["addresses"].array.end())
491  {
492  error() << "Could not find address corresponding to symbol '"
493  << d["end-symbol"].value << "' (end of section)" << eom;
494  return 1;
495  }
496  source_locationt end_loc;
497  end_loc.set_file(linker_script);
498  std::ostringstream end_comment;
499  end_comment << "Pointer to end of object section '"
500  << d["section"].value << "'. Original address in object file"
501  << " is " << (*it)["val"].value;
502  end_loc.set_comment(end_comment.str());
503  end_sym.add_source_location()=end_loc;
504 
505  code_assignt end_assign(end_sym, array_end);
506  end_assign.add_source_location()=end_loc;
507  goto_programt::instructiont end_assign_i;
508  end_assign_i.make_assignment(end_assign);
509  end_assign_i.source_location=end_loc;
510  initialize_instructions.push_front(end_assign_i);
511  }
512 
513  // Add the array to the symbol table. We don't add the pointer(s) to the
514  // symbol table because they were already there, but declared as extern and
515  // probably with a different type. We change the externness and type in
516  // pointerize_linker_defined_symbols.
517  symbolt array_sym;
518  array_sym.name=array_name.str();
519  array_sym.pretty_name=array_name.str();
520  array_sym.is_lvalue=array_sym.is_static_lifetime=true;
521  array_sym.type=array_type;
522  array_sym.location=array_loc;
523  symbol_table.add(array_sym);
524 
525  // Push the array initialization to the front now, so that it happens before
526  // the initialization of the symbols that point to it.
527  namespacet ns(symbol_table);
528  exprt zi=zero_initializer(array_type, array_loc, ns, *message_handler);
529  code_assignt array_assign(array_expr, zi);
530  array_assign.add_source_location()=array_loc;
531  goto_programt::instructiont array_assign_i;
532  array_assign_i.make_assignment(array_assign);
533  array_assign_i.source_location=array_loc;
534  initialize_instructions.push_front(array_assign_i);
535  }
536 
537  // We've added every linker-defined symbol that marks the start or end of a
538  // region. But there might be other linker-defined symbols with some random
539  // address. These will have been declared extern too, so we need to give them
540  // a value also. Here, we give them the actual value that they have in the
541  // object file, since we're not assigning any object to them.
542  for(const auto &d : data["addresses"].array)
543  {
544  auto it=linker_values.find(irep_idt(d["sym"].value));
545  if(it!=linker_values.end())
546  // sym marks the start or end of a region; already dealt with.
547  continue;
548 
549  // Perhaps this is a size symbol for a section whose size we truncated
550  // earlier.
551  irep_idt symbol_value;
552  const auto &pair=truncated_symbols.find(d["sym"].value);
553  if(pair==truncated_symbols.end())
554  symbol_value=d["val"].value;
555  else
556  {
557  debug() << "Truncating the value of symbol " << d["sym"].value << " from "
558  << d["val"].value << " to " << MAX_FLATTENED_ARRAY_SIZE
559  << " because it corresponds to the size of a too-large section."
560  << eom;
562  }
563 
565  loc.set_file(linker_script);
566  loc.set_comment("linker script-defined symbol: char *"+
567  d["sym"].value+"="+"(char *)"+id2string(symbol_value)+"u;");
568 
569  symbol_exprt lhs(d["sym"].value, pointer_type(char_type()));
570 
571  constant_exprt rhs;
573  unsigned_int_type().get_width()));
574  rhs.type()=unsigned_int_type();
575 
576  exprt rhs_tc(rhs);
578 
579  linker_values[irep_idt(d["sym"].value)]=std::make_pair(lhs, rhs_tc);
580 
581  code_assignt assign(lhs, rhs_tc);
582  assign.add_source_location()=loc;
584  assign_i.make_assignment(assign);
585  assign_i.source_location=loc;
586  initialize_instructions.push_front(assign_i);
587  }
588  return 0;
589 }
590 #else
591 {
592  goto_programt::instructionst initialize_instructions=gp.instructions;
593  for(const auto &d : data["regions"].array)
594  {
596  const code_typet void_t({}, empty_typet());
597  f.function()=symbol_exprt(CPROVER_PREFIX "allocated_memory", void_t);
598  unsigned start=safe_string2unsigned(d["start"].value);
599  unsigned size=safe_string2unsigned(d["size"].value);
600  constant_exprt first=from_integer(start, size_type());
601  constant_exprt second=from_integer(size, size_type());
602  code_function_callt::argumentst args={first, second};
603  f.arguments()=args;
604 
606  loc.set_file(linker_script);
607  loc.set_comment("linker script-defined region:\n"+d["commt"].value+":\n"+
608  d["annot"].value);
610 
612  i.make_function_call(f);
613  initialize_instructions.push_front(i);
614  }
615 
616  if(!symbol_table.has_symbol(CPROVER_PREFIX "allocated_memory"))
617  {
618  symbolt sym;
619  sym.name=CPROVER_PREFIX "allocated_memory";
620  sym.pretty_name=CPROVER_PREFIX "allocated_memory";
621  sym.is_lvalue=sym.is_static_lifetime=true;
622  const code_typet void_t({}, empty_typet());
623  sym.type=void_t;
624  symbol_table.add(sym);
625  }
626 
627  for(const auto &d : data["addresses"].array)
628  {
630  loc.set_file(linker_script);
631  loc.set_comment("linker script-defined symbol: char *"+
632  d["sym"].value+"="+"(char *)"+d["val"].value+"u;");
633 
634  symbol_exprt lhs(d["sym"].value, pointer_type(char_type()));
635 
636  constant_exprt rhs;
637  rhs.set_value(integer2binary(string2integer(d["val"].value),
638  unsigned_int_type().get_width()));
639  rhs.type()=unsigned_int_type();
640 
641  exprt rhs_tc(rhs);
642  rhs_tc.make_typecast(pointer_type(char_type()));
643 
644  linker_values[irep_idt(d["sym"].value)]=std::make_pair(lhs, rhs_tc);
645 
646  code_assignt assign(lhs, rhs_tc);
647  assign.add_source_location()=loc;
649  assign_i.make_assignment(assign);
650  initialize_instructions.push_front(assign_i);
651  }
652  return 0;
653 }
654 #endif
655 
657  std::list<irep_idt> &linker_defined_symbols,
658  const symbol_tablet &symbol_table,
659  const std::string &out_file,
660  const std::string &def_out_file)
661 {
662  for(auto const &pair : symbol_table.symbols)
663  if(pair.second.is_extern && pair.second.value.is_nil() &&
664  pair.second.name!="__CPROVER_memory")
665  linker_defined_symbols.push_back(pair.second.name);
666 
667  std::ostringstream linker_def_str;
668  std::copy(
669  linker_defined_symbols.begin(),
670  linker_defined_symbols.end(),
671  std::ostream_iterator<irep_idt>(linker_def_str, "\n"));
672  debug() << "Linker-defined symbols: [" << linker_def_str.str() << "]\n"
673  << eom;
674 
675  temporary_filet linker_def_infile("goto-cc-linker-defs", "");
676  std::ofstream linker_def_file(linker_def_infile());
677  linker_def_file << linker_def_str.str();
678  linker_def_file.close();
679 
680  std::vector<std::string> argv=
681  {
682  "ls_parse.py",
683  "--script", cmdline.get_value('T'),
684  "--object", out_file,
685  "--sym-file", linker_def_infile(),
686  "--out-file", def_out_file
687  };
688 
689  if(get_message_handler().get_verbosity() >= messaget::M_DEBUG)
690  argv.push_back("--very-verbose");
691  else if(get_message_handler().get_verbosity() > messaget::M_RESULT)
692  argv.push_back("--verbose");
693 
694  debug() << "RUN:";
695  for(std::size_t i=0; i<argv.size(); i++)
696  debug() << " " << argv[i];
697  debug() << eom;
698 
699  int rc=run(argv[0], argv, linker_def_infile(), def_out_file);
700  if(rc!=0)
701  warning() << "Problem parsing linker script" << eom;
702 
703  return rc;
704 }
705 
707  const std::list<irep_idt> &linker_defined_symbols,
708  const linker_valuest &linker_values)
709 {
710  int fail=0;
711  for(const auto &sym : linker_defined_symbols)
712  if(linker_values.find(sym)==linker_values.end())
713  {
714  fail=1;
715  error() << "Variable '" << sym << "' was declared extern but never given "
716  << "a value, even in a linker script" << eom;
717  }
718 
719  for(const auto &pair : linker_values)
720  {
721  auto it=std::find(linker_defined_symbols.begin(),
722  linker_defined_symbols.end(), pair.first);
723  if(it==linker_defined_symbols.end())
724  {
725  fail=1;
726  error() << "Linker script-defined symbol '" << pair.first << "' was "
727  << "either defined in the C source code, not declared extern in "
728  << "the C source code, or does not appear in the C source code"
729  << eom;
730  }
731  }
732  return fail;
733 }
734 
736 {
737  return (!(data.is_object() &&
738  data.object.find("regions")!=data.object.end() &&
739  data.object.find("addresses")!=data.object.end() &&
740  data["regions"].is_array() &&
741  data["addresses"].is_array() &&
742  std::all_of(data["addresses"].array.begin(),
743  data["addresses"].array.end(),
744  [](jsont j)
745  {
746  return j.is_object() &&
747  j.object.find("val")!=j.object.end() &&
748  j.object.find("sym")!=j.object.end() &&
749  j["val"].is_number() &&
750  j["sym"].is_string();
751  }) &&
752  std::all_of(data["regions"].array.begin(),
753  data["regions"].array.end(),
754  [](jsont j)
755  {
756  return j.is_object() &&
757  j.object.find("start")!=j.object.end() &&
758  j.object.find("size")!=j.object.end() &&
759  j.object.find("annot")!=j.object.end() &&
760  j.object.find("commt")!=j.object.end() &&
761  j.object.find("start-symbol")!=j.object.end() &&
762  j.object.find("has-end-symbol")!=j.object.end() &&
763  j.object.find("section")!=j.object.end() &&
764  j["start"].is_number() &&
765  j["size"].is_number() &&
766  j["annot"].is_string() &&
767  j["start-symbol"].is_string() &&
768  j["section"].is_string() &&
769  j["commt"].is_string() &&
770  ( (j["has-end-symbol"].is_true() &&
771  j.object.find("end-symbol")!=j.object.end() &&
772  j["end-symbol"].is_string())
773  ||(j["has-end-symbol"].is_false() &&
774  j.object.find("size-symbol")!=j.object.end() &&
775  j.object.find("end-symbol")==j.object.end() &&
776  j["size-symbol"].is_string()));
777  })));
778 }
#define loc()
symbol_tablet symbol_table
Definition: language_ui.h:25
irep_idt name
The unique identifier.
Definition: symbol.h:43
BigInt mp_integer
Definition: mp_arith.h:22
Base type of functions.
Definition: std_types.h:764
const std::string & id2string(const irep_idt &d)
Definition: irep.h:44
pointer_typet pointer_type(const typet &subtype)
Definition: c_types.cpp:243
const mp_integer string2integer(const std::string &n, unsigned base)
Definition: mp_arith.cpp:57
exprt & op0()
Definition: expr.h:72
#define CPROVER_PREFIX
void symbols_to_pointerize(const linker_valuest &linker_values, const exprt &expr, std::list< symbol_exprt > &to_pointerize) const
fill to_pointerize with names of linker symbols appearing in expr
unsignedbv_typet unsigned_int_type()
Definition: c_types.cpp:44
void set_comment(const irep_idt &comment)
void __CPROVER_allocated_memory(__CPROVER_size_t address, __CPROVER_size_t extent)
const irep_idt & get_identifier() const
Definition: std_expr.h:128
Definition: json.h:23
int run(const std::string &what, const std::vector< std::string > &argv, const std::string &std_input, const std::string &std_output, const std::string &std_error)
Definition: run.cpp:82
linker_script_merget(compilet &, const std::string &elf_binary, const std::string &goto_binary, const cmdlinet &, message_handlert &)
symbolt & get_writeable_ref(const irep_idt &name)
Find a symbol in the symbol table for read-write access.
Read Goto Programs.
exprt value
Initial value of symbol.
Definition: symbol.h:37
std::string get_value(char option) const
Definition: cmdline.cpp:45
function_mapt function_map
irep_idt pretty_name
Language-specific display name.
Definition: symbol.h:55
typet & type()
Definition: expr.h:56
exprt::operandst argumentst
Definition: std_code.h:888
unsignedbv_typet size_type()
Definition: c_types.cpp:58
Symbol table entry.This is a symbol in the symbol table, stored in an object of type symbol_tablet...
Definition: symbol.h:30
Magic numbers used throughout the codebase.
A constant literal expression.
Definition: std_expr.h:4422
static mstreamt & eom(mstreamt &m)
Definition: message.h:272
int pointerize_linker_defined_symbols(goto_functionst &goto_functions, symbol_tablet &symbol_table, const linker_valuest &linker_values)
convert the type of linker script-defined symbols to char*
std::map< irep_idt, std::pair< symbol_exprt, exprt > > linker_valuest
void make_function_call(const codet &_code)
Definition: goto_program.h:294
bool parse_json(std::istream &in, const std::string &filename, message_handlert &message_handler, jsont &dest)
Definition: json_parser.cpp:16
bool is_static_lifetime
Definition: symbol.h:67
mstreamt & warning() const
Definition: message.h:307
This class represents an instruction in the GOTO intermediate representation.
Definition: goto_program.h:173
virtual symbolt * get_writeable(const irep_idt &name) override
Find a symbol in the symbol table for read-write access.
Definition: symbol_table.h:87
std::list< instructiont > instructionst
Definition: goto_program.h:395
int pointerize_subexprs_of(exprt &expr, std::list< symbol_exprt > &to_pointerize, const linker_valuest &linker_values, const namespacet &ns)
Expression Initialization.
bool read_goto_binary(const std::string &filename, goto_modelt &dest, message_handlert &message_handler)
const irep_idt & id() const
Definition: irep.h:259
void set_value(const irep_idt &value)
Definition: std_expr.h:4444
bool is_false(const literalt &l)
Definition: literal.h:196
virtual bool isset(char option) const
Definition: cmdline.cpp:27
argumentst & arguments()
Definition: std_code.h:890
const std::string & elf_binary
source_locationt source_location
Definition: message.h:214
void set_file(const irep_idt &file)
The empty type.
Definition: std_types.h:54
#define INITIALIZE_FUNCTION
instructionst instructions
The list of instructions in the goto program.
Definition: goto_program.h:403
Patterns of expressions that should be replaced.
exprt & op1()
Definition: expr.h:75
std::list< replacement_predicatet > replacement_predicates
The "shapes" of expressions to be replaced by a pointer.
The symbol table.
Definition: symbol_table.h:19
mstreamt & error() const
Definition: message.h:302
TO_BE_DOCUMENTED.
Definition: namespace.h:74
A function call.
Definition: std_code.h:858
The plus expression.
Definition: std_expr.h:893
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast a generic exprt to a symbol_exprt.
Definition: std_expr.h:210
dstringt has one field, an unsigned integer no which is an index into a static table of strings...
Definition: dstring.h:33
Operator to return the address of an object.
Definition: std_expr.h:3168
const symbolst & symbols
bool has_symbol(const irep_idt &name) const
Check whether a symbol exists in the symbol table.
bool write_object_file(const std::string &, const symbol_tablet &, goto_functionst &)
writes the goto functions in the function table to a binary format object file.
Definition: compile.cpp:581
exprt zero_initializer(const typet &type, const source_locationt &source_location, const namespacet &ns, message_handlert &message_handler)
int linker_data_is_malformed(const jsont &data) const
Validate output of the scripts/ls_parse.py tool.
A generic container class for the GOTO intermediate representation of one function.
Definition: goto_program.h:70
bool is_extern
Definition: symbol.h:68
int replace_expr(exprt &old_expr, const linker_valuest &linker_values, const symbol_exprt &old_symbol, const irep_idt &ident, const std::string &shape)
do the actual replacement of an expr with a new pointer expr
unsigned safe_string2unsigned(const std::string &str, int base)
Definition: string2int.cpp:54
typet type
Type of symbol.
Definition: symbol.h:34
source_locationt location
Source code location of definition of symbol.
Definition: symbol.h:40
message_handlert & get_message_handler()
Definition: message.h:153
mstreamt & result() const
Definition: message.h:312
const cmdlinet & cmdline
exprt & function()
Definition: std_code.h:878
source_locationt source_location
The location of the instruction in the source file.
Definition: goto_program.h:182
Base class for all expressions.
Definition: expr.h:42
int add_linker_script_definitions()
Add values of linkerscript-defined symbols to the goto-binary.
std::string to_string(const string_constraintt &expr)
Used for debug printing.
const source_locationt & source_location() const
Definition: expr.h:125
int goto_and_object_mismatch(const std::list< irep_idt > &linker_defined_symbols, const linker_valuest &linker_values)
one-to-one correspondence between extern & linker symbols
const std::string integer2binary(const mp_integer &n, std::size_t width)
Definition: mp_arith.cpp:67
source_locationt & add_source_location()
Definition: expr.h:130
#define Forall_goto_program_instructions(it, program)
Definition: goto_program.h:740
arrays with given size
Definition: std_types.h:1013
Expression to hold a symbol (variable)
Definition: std_expr.h:90
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
goto_programt coverage_criteriont message_handlert & message_handler
Definition: cover.cpp:66
dstringt irep_idt
Definition: irep.h:32
mstreamt & debug() const
Definition: message.h:332
Merge linker script-defined symbols into a goto-program.
int ls_data2instructions(jsont &data, const std::string &linker_script, goto_programt &gp, symbol_tablet &symbol_table, linker_valuest &linker_values)
Write a list of definitions derived from data into gp&#39;s instructions member.
const std::size_t MAX_FLATTENED_ARRAY_SIZE
Definition: magic.h:11
const std::string & goto_binary
operandst & operands()
Definition: expr.h:66
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
void make_typecast(const typet &_type)
Definition: expr.cpp:84
const irept & find(const irep_namet &name) const
Definition: irep.cpp:285
bitvector_typet char_type()
Definition: c_types.cpp:114
Assignment.
Definition: std_code.h:196
Definition: kdev_t.h:24
int get_linker_script_data(std::list< irep_idt > &linker_defined_symbols, const symbol_tablet &symbol_table, const std::string &out_file, const std::string &def_out_file)
Write linker script definitions to linker_data.
std::string array_name(const namespacet &ns, const exprt &expr)
Definition: array_name.cpp:19
goal_filters add(util_make_unique< internal_goals_filtert >(message_handler))
bool is_lvalue
Definition: symbol.h:68
array index operator
Definition: std_expr.h:1462
Definition: kdev_t.h:19