cprover
linker_script_merge.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Linker Script Merging
4 
5 Author: Kareem Khazem <karkhaz@karkhaz.com>, 2017
6 
7 \*******************************************************************/
8 
9 #include "linker_script_merge.h"
10 
11 #include <algorithm>
12 #include <fstream>
13 #include <iterator>
14 
15 #include <util/arith_tools.h>
16 #include <util/c_types.h>
17 #include <util/cmdline.h>
18 #include <util/expr_initializer.h>
19 #include <util/magic.h>
20 #include <util/pointer_expr.h>
21 #include <util/run.h>
22 #include <util/tempfile.h>
23 
24 #include <json/json_parser.h>
25 
27 
31 
32 #include "compile.h"
33 
35 {
36  if(!cmdline.isset('T'))
37  return 0;
38 
39  auto original_goto_model =
41 
42  if(!original_goto_model.has_value())
43  {
44  log.error() << "Unable to read goto binary for linker script merging"
45  << messaget::eom;
46  return 1;
47  }
48 
49  temporary_filet linker_def_outfile("goto-cc-linker-info", ".json");
50  std::list<irep_idt> linker_defined_symbols;
51  int fail = get_linker_script_data(
52  linker_defined_symbols,
53  original_goto_model->symbol_table,
54  elf_binary,
55  linker_def_outfile());
56  // ignore linker script parsing failures until the code is tested more widely
57  if(fail!=0)
58  return 0;
59 
60  jsont data;
61  fail = parse_json(linker_def_outfile(), log.get_message_handler(), data);
62  if(fail!=0)
63  {
64  log.error() << "Problem parsing linker script JSON data" << messaget::eom;
65  return fail;
66  }
67 
69  if(fail!=0)
70  {
71  log.error() << "Malformed linker-script JSON document" << messaget::eom;
72  data.output(log.error());
73  return fail;
74  }
75 
76  linker_valuest linker_values;
77  fail = ls_data2instructions(
78  data,
79  cmdline.get_value('T'),
80  original_goto_model->symbol_table,
81  linker_values);
82  if(fail!=0)
83  {
84  log.error() << "Could not add linkerscript defs to symbol table"
85  << messaget::eom;
86  return fail;
87  }
88  if(
89  original_goto_model->goto_functions.function_map.erase(
90  INITIALIZE_FUNCTION) != 0)
91  {
93  original_goto_model->symbol_table,
94  original_goto_model->symbol_table.lookup_ref(INITIALIZE_FUNCTION)
95  .location);
98  original_goto_model->symbol_table,
99  original_goto_model->goto_functions,
101  original_goto_model->goto_functions.update();
102  }
103 
104  fail=goto_and_object_mismatch(linker_defined_symbols, linker_values);
105  if(fail!=0)
106  return fail;
107 
108  // The keys of linker_values are exactly the elements of
109  // linker_defined_symbols, so iterate over linker_values from now on.
110 
111  fail = pointerize_linker_defined_symbols(*original_goto_model, linker_values);
112 
113  if(fail!=0)
114  {
115  log.error() << "Could not pointerize all linker-defined expressions"
116  << messaget::eom;
117  return fail;
118  }
119 
121  goto_binary,
122  *original_goto_model,
123  cmdline.isset("validate-goto-model"),
125 
126  if(fail!=0)
127  {
128  log.error() << "Could not write linkerscript-augmented binary"
129  << messaget::eom;
130  }
131 
132  return fail;
133 }
134 
136  const std::string &elf_binary,
137  const std::string &goto_binary,
138  const cmdlinet &cmdline,
139  message_handlert &message_handler)
140  : elf_binary(elf_binary),
141  goto_binary(goto_binary),
142  cmdline(cmdline),
143  log(message_handler),
144  replacement_predicates(
146  "address of array's first member",
147  [](const exprt &expr) -> const symbol_exprt & {
148  return to_symbol_expr(
149  to_index_expr(to_address_of_expr(expr).object()).index());
150  },
151  [](const exprt &expr) {
152  return expr.id() == ID_address_of &&
153  expr.type().id() == ID_pointer &&
154 
155  to_address_of_expr(expr).object().id() == ID_index &&
156  to_address_of_expr(expr).object().type().id() ==
157  ID_unsignedbv &&
158 
159  to_index_expr(to_address_of_expr(expr).object())
160  .array()
161  .id() == ID_symbol &&
162  to_index_expr(to_address_of_expr(expr).object())
163  .array()
164  .type()
165  .id() == ID_array &&
166 
167  to_index_expr(to_address_of_expr(expr).object())
168  .index()
169  .id() == ID_constant &&
170  to_index_expr(to_address_of_expr(expr).object())
171  .index()
172  .type()
173  .id() == ID_signedbv;
174  }),
176  "address of array",
177  [](const exprt &expr) -> const symbol_exprt & {
178  return to_symbol_expr(to_address_of_expr(expr).object());
179  },
180  [](const exprt &expr) {
181  return expr.id() == ID_address_of &&
182  expr.type().id() == ID_pointer &&
183 
184  to_address_of_expr(expr).object().id() == ID_symbol &&
185  to_address_of_expr(expr).object().type().id() == ID_array;
186  }),
188  "address of struct",
189  [](const exprt &expr) -> const symbol_exprt & {
190  return to_symbol_expr(to_address_of_expr(expr).object());
191  },
192  [](const exprt &expr) {
193  return expr.id() == ID_address_of &&
194  expr.type().id() == ID_pointer &&
195 
196  to_address_of_expr(expr).object().id() == ID_symbol &&
197  (to_address_of_expr(expr).object().type().id() == ID_struct ||
198  to_address_of_expr(expr).object().type().id() ==
199  ID_struct_tag);
200  }),
202  "array variable",
203  [](const exprt &expr) -> const symbol_exprt & {
204  return to_symbol_expr(expr);
205  },
206  [](const exprt &expr) {
207  return expr.id() == ID_symbol && expr.type().id() == ID_array;
208  }),
210  "pointer (does not need pointerizing)",
211  [](const exprt &expr) -> const symbol_exprt & {
212  return to_symbol_expr(expr);
213  },
214  [](const exprt &expr) {
215  return expr.id() == ID_symbol && expr.type().id() == ID_pointer;
216  })})
217 {}
218 
220  goto_modelt &goto_model,
221  const linker_valuest &linker_values)
222 {
223  const namespacet ns(goto_model.symbol_table);
224 
225  int ret=0;
226 
227  // Next, find all occurrences of linker-defined symbols that are _values_
228  // of some symbol in the symbol table, and pointerize them too
229  for(const auto &pair : goto_model.symbol_table.symbols)
230  {
231  std::list<symbol_exprt> to_pointerize;
232  symbols_to_pointerize(linker_values, pair.second.value, to_pointerize);
233 
234  if(to_pointerize.empty())
235  continue;
236  log.debug() << "Pointerizing the symbol-table value of symbol "
237  << pair.first << messaget::eom;
238  int fail = pointerize_subexprs_of(
239  goto_model.symbol_table.get_writeable_ref(pair.first).value,
240  to_pointerize,
241  linker_values);
242  if(to_pointerize.empty() && fail==0)
243  continue;
244  ret=1;
245  for(const auto &sym : to_pointerize)
246  {
247  log.error() << " Could not pointerize '" << sym.get_identifier()
248  << "' in symbol table entry " << pair.first << ". Pretty:\n"
249  << sym.pretty() << "\n";
250  }
251  log.error() << messaget::eom;
252  }
253 
254  // Finally, pointerize all occurrences of linker-defined symbols in the
255  // goto program
256  for(auto &gf : goto_model.goto_functions.function_map)
257  {
258  goto_programt &program=gf.second.body;
259  for(auto &instruction : program.instructions)
260  {
261  for(exprt *insts : std::list<exprt *>(
262  {&(instruction.code_nonconst()), &(instruction.guard)}))
263  {
264  std::list<symbol_exprt> to_pointerize;
265  symbols_to_pointerize(linker_values, *insts, to_pointerize);
266  if(to_pointerize.empty())
267  continue;
268  log.debug() << "Pointerizing a program expression..." << messaget::eom;
269  int fail = pointerize_subexprs_of(*insts, to_pointerize, linker_values);
270  if(to_pointerize.empty() && fail==0)
271  continue;
272  ret=1;
273  for(const auto &sym : to_pointerize)
274  {
275  log.error() << " Could not pointerize '" << sym.get_identifier()
276  << "' in function " << gf.first << ". Pretty:\n"
277  << sym.pretty() << "\n";
278  log.error().source_location = instruction.source_location();
279  }
280  log.error() << messaget::eom;
281  }
282  }
283  }
284  return ret;
285 }
286 
288  exprt &old_expr,
289  const linker_valuest &linker_values,
290  const symbol_exprt &old_symbol,
291  const irep_idt &ident,
292  const std::string &shape)
293 {
294  auto it=linker_values.find(ident);
295  if(it==linker_values.end())
296  {
297  log.error() << "Could not find a new expression for linker script-defined "
298  << "symbol '" << ident << "'" << messaget::eom;
299  return 1;
300  }
301  symbol_exprt new_expr=it->second.first;
302  new_expr.add_source_location()=old_symbol.source_location();
303  log.debug() << "Pointerizing linker-defined symbol '" << ident
304  << "' of shape <" << shape << ">." << messaget::eom;
305  old_expr=new_expr;
306  return 0;
307 }
308 
310  exprt &expr,
311  std::list<symbol_exprt> &to_pointerize,
312  const linker_valuest &linker_values)
313 {
314  int fail=0, tmp=0;
315  for(auto const &pair : linker_values)
316  for(auto const &pattern : replacement_predicates)
317  {
318  if(!pattern.match(expr))
319  continue;
320  // take a copy, expr will be changed below
321  const symbol_exprt inner_symbol=pattern.inner_symbol(expr);
322  if(pair.first!=inner_symbol.get_identifier())
323  continue;
324  tmp=replace_expr(expr, linker_values, inner_symbol, pair.first,
325  pattern.description());
326  fail=tmp?tmp:fail;
327  auto result=std::find(to_pointerize.begin(), to_pointerize.end(),
328  inner_symbol);
329  if(result==to_pointerize.end())
330  {
331  fail=1;
332  log.error() << "Too many removals of '" << inner_symbol.get_identifier()
333  << "'" << messaget::eom;
334  }
335  else
336  to_pointerize.erase(result);
337  // If we get here, we've just pointerized this expression. That expression
338  // will be a symbol possibly wrapped in some unary junk, but won't contain
339  // other symbols as subexpressions that might need to be pointerized. So
340  // it's safe to bail out here.
341  return fail;
342  }
343 
344  for(auto &op : expr.operands())
345  {
346  tmp = pointerize_subexprs_of(op, to_pointerize, linker_values);
347  fail=tmp?tmp:fail;
348  }
349  return fail;
350 }
351 
353  const linker_valuest &linker_values,
354  const exprt &expr,
355  std::list<symbol_exprt> &to_pointerize) const
356 {
357  for(const auto &op : expr.operands())
358  {
359  if(op.id()!=ID_symbol)
360  continue;
361  const symbol_exprt &sym_exp=to_symbol_expr(op);
362  const auto &pair=linker_values.find(sym_exp.get_identifier());
363  if(pair!=linker_values.end())
364  to_pointerize.push_back(sym_exp);
365  }
366  for(const auto &op : expr.operands())
367  symbols_to_pointerize(linker_values, op, to_pointerize);
368 }
369 
370 #if 0
371 The current implementation of this function is less precise than the
372  commented-out version below. To understand the difference between these
373  implementations, consider the following example:
374 
375 Suppose we have a section in the linker script, 100 bytes long, where the
376 address of the symbol sec_start is the start of the section (value 4096) and the
377 address of sec_end is the end of that section (value 4196).
378 
379 The current implementation synthesizes the goto-version of the following C:
380 
381  char __sec_array[100];
382  char *sec_start=(&__sec_array[0]);
383  char *sec_end=((&__sec_array[0])+100);
384  // Yes, it is 100 not 99. We're pointing to the end of the memory occupied
385  // by __sec_array, not the last element of __sec_array.
386 
387 This is imprecise for the following reason: the actual address of the array and
388 the pointers shall be some random CBMC-internal address, instead of being 4096
389 and 4196. The linker script, on the other hand, would have specified the exact
390 position of the section, and we even know what the actual values of sec_start
391 and sec_end are from the object file (these values are in the `addresses` list
392 of the `data` argument to this function). If the correctness of the code depends
393 on these actual values, then CBMCs model of the code is too imprecise to verify
394 this.
395 
396 The commented-out version of this function below synthesizes the following:
397 
398  char *sec_start=4096;
399  char *sec_end=4196;
400  __CPROVER_allocated_memory(4096, 100);
401 
402 This code has both the actual addresses of the start and end of the section and
403 tells CBMC that the intermediate region is valid. However, the allocated_memory
404 macro does not currently allocate an actual object at the address 4096, so
405 symbolic execution can fail. In particular, the 'allocated memory' is part of
406 __CPROVER_memory, which does not have a bounded size; this means that (for
407 example) calls to memcpy or memset fail, because the first and third arguments
408 do not have know n size. The commented-out implementation should be reinstated
409 once this limitation of __CPROVER_allocated_memory has been fixed.
410 
411 In either case, no other changes to the rest of the code (outside this function)
412 should be necessary. The rest of this file converts expressions containing the
413 linker-defined symbol into pointer types if they were not already, and this is
414 the right behaviour for both implementations.
415 #endif
417  jsont &data,
418  const std::string &linker_script,
419  symbol_tablet &symbol_table,
420  linker_valuest &linker_values)
421 #if 1
422 {
423  std::map<irep_idt, std::size_t> truncated_symbols;
424  for(auto &d : to_json_array(data["regions"]))
425  {
426  bool has_end=d["has-end-symbol"].is_true();
427 
428  std::ostringstream array_name;
429  array_name << CPROVER_PREFIX << "linkerscript-array_"
430  << d["start-symbol"].value;
431  if(has_end)
432  array_name << "-" << d["end-symbol"].value;
433 
434 
435  // Array symbol_exprt
436  mp_integer array_size = string2integer(d["size"].value);
437  if(array_size > MAX_FLATTENED_ARRAY_SIZE)
438  {
439  log.warning() << "Object section '" << d["section"].value << "' of size "
440  << array_size << " is too large to model. Truncating to "
441  << MAX_FLATTENED_ARRAY_SIZE << " bytes" << messaget::eom;
442  array_size=MAX_FLATTENED_ARRAY_SIZE;
443  if(!has_end)
444  truncated_symbols[d["size-symbol"].value]=MAX_FLATTENED_ARRAY_SIZE;
445  }
446 
447  constant_exprt array_size_expr=from_integer(array_size, size_type());
448  array_typet array_type(char_type(), array_size_expr);
449 
450  source_locationt array_loc;
451  array_loc.set_file(linker_script);
452  std::ostringstream array_comment;
453  array_comment << "Object section '" << d["section"].value << "' of size "
454  << array_size << " bytes";
455  array_loc.set_comment(array_comment.str());
456 
457  namespacet ns(symbol_table);
458  const auto zi = zero_initializer(array_type, array_loc, ns);
459  CHECK_RETURN(zi.has_value());
460 
461  // Add the array to the symbol table.
462  symbolt array_sym;
463  array_sym.is_static_lifetime = true;
464  array_sym.is_lvalue = true;
465  array_sym.is_state_var = true;
466  array_sym.name = array_name.str();
467  array_sym.type = array_type;
468  array_sym.value = *zi;
469  array_sym.location = array_loc;
470 
471  bool failed = symbol_table.add(array_sym);
473 
474  // Array start address
475  index_exprt zero_idx{array_sym.symbol_expr(), from_integer(0, size_type())};
476  address_of_exprt array_start(zero_idx);
477 
478  // Linker-defined symbol_exprt pointing to start address
479  symbolt start_sym;
480  start_sym.is_static_lifetime = true;
481  start_sym.is_lvalue = true;
482  start_sym.is_state_var = true;
483  start_sym.name = d["start-symbol"].value;
484  start_sym.type = pointer_type(char_type());
485  start_sym.value = array_start;
486  linker_values.emplace(
487  d["start-symbol"].value,
488  std::make_pair(start_sym.symbol_expr(), array_start));
489 
490  // Since the value of the pointer will be a random CBMC address, write a
491  // note about the real address in the object file
492  auto it = std::find_if(
493  to_json_array(data["addresses"]).begin(),
494  to_json_array(data["addresses"]).end(),
495  [&d](const jsont &add) {
496  return add["sym"].value == d["start-symbol"].value;
497  });
498  if(it == to_json_array(data["addresses"]).end())
499  {
500  log.error() << "Start: Could not find address corresponding to symbol '"
501  << d["start-symbol"].value << "' (start of section)"
502  << messaget::eom;
503  return 1;
504  }
505  source_locationt start_loc;
506  start_loc.set_file(linker_script);
507  std::ostringstream start_comment;
508  start_comment << "Pointer to beginning of object section '"
509  << d["section"].value << "'. Original address in object file"
510  << " is " << (*it)["val"].value;
511  start_loc.set_comment(start_comment.str());
512  start_sym.location = start_loc;
513 
514  auto start_sym_entry = symbol_table.insert(start_sym);
515  if(!start_sym_entry.second)
516  start_sym_entry.first = start_sym;
517 
518  if(has_end) // Same for pointer to end of array
519  {
520  plus_exprt array_end(array_start, array_size_expr);
521 
522  symbolt end_sym;
523  end_sym.is_static_lifetime = true;
524  end_sym.is_lvalue = true;
525  end_sym.is_state_var = true;
526  end_sym.name = d["end-symbol"].value;
527  end_sym.type = pointer_type(char_type());
528  end_sym.value = array_end;
529  linker_values.emplace(
530  d["end-symbol"].value,
531  std::make_pair(end_sym.symbol_expr(), array_end));
532 
533  auto entry = std::find_if(
534  to_json_array(data["addresses"]).begin(),
535  to_json_array(data["addresses"]).end(),
536  [&d](const jsont &add) {
537  return add["sym"].value == d["end-symbol"].value;
538  });
539  if(entry == to_json_array(data["addresses"]).end())
540  {
541  log.debug() << "Could not find address corresponding to symbol '"
542  << d["end-symbol"].value << "' (end of section)"
543  << messaget::eom;
544  return 1;
545  }
546  source_locationt end_loc;
547  end_loc.set_file(linker_script);
548  std::ostringstream end_comment;
549  end_comment << "Pointer to end of object section '" << d["section"].value
550  << "'. Original address in object file"
551  << " is " << (*entry)["val"].value;
552  end_loc.set_comment(end_comment.str());
553  end_sym.location = end_loc;
554 
555  auto end_sym_entry = symbol_table.insert(end_sym);
556  if(!end_sym_entry.second)
557  end_sym_entry.first = end_sym;
558  }
559  }
560 
561  // We've added every linker-defined symbol that marks the start or end of a
562  // region. But there might be other linker-defined symbols with some random
563  // address. These will have been declared extern too, so we need to give them
564  // a value also. Here, we give them the actual value that they have in the
565  // object file, since we're not assigning any object to them.
566  for(const auto &d : to_json_array(data["addresses"]))
567  {
568  auto it=linker_values.find(irep_idt(d["sym"].value));
569  if(it!=linker_values.end())
570  // sym marks the start or end of a region; already dealt with.
571  continue;
572 
573  // Perhaps this is a size symbol for a section whose size we truncated
574  // earlier.
575  irep_idt symbol_value;
576  const auto &pair=truncated_symbols.find(d["sym"].value);
577  if(pair==truncated_symbols.end())
578  symbol_value=d["val"].value;
579  else
580  {
581  log.debug()
582  << "Truncating the value of symbol " << d["sym"].value << " from "
583  << d["val"].value << " to " << MAX_FLATTENED_ARRAY_SIZE
584  << " because it corresponds to the size of a too-large section."
585  << messaget::eom;
587  }
588 
589  source_locationt loc;
590  loc.set_file(linker_script);
591  loc.set_comment("linker script-defined symbol: char *"+
592  d["sym"].value+"="+"(char *)"+id2string(symbol_value)+"u;");
593 
594  constant_exprt rhs(
596  string2integer(id2string(symbol_value)),
597  unsigned_int_type().get_width()),
599 
600  typecast_exprt rhs_tc(rhs, pointer_type(char_type()));
601 
602  symbolt &symbol = symbol_table.get_writeable_ref(d["sym"].value);
603  symbol.is_extern = false;
604  symbol.is_static_lifetime = true;
605  symbol.location = loc;
606  symbol.type = pointer_type(char_type());
607  symbol.value = rhs_tc;
608 
609  linker_values.emplace(
610  irep_idt(d["sym"].value), std::make_pair(symbol.symbol_expr(), rhs_tc));
611  }
612  return 0;
613 }
614 #else
615 {
616  goto_programt::instructionst initialize_instructions=gp.instructions;
617  for(const auto &d : to_json_array(data["regions"]))
618  {
619  unsigned start=safe_string2unsigned(d["start"].value);
620  unsigned size=safe_string2unsigned(d["size"].value);
621  constant_exprt first=from_integer(start, size_type());
622  constant_exprt second=from_integer(size, size_type());
623  const code_typet void_t({}, empty_typet());
625  symbol_exprt(CPROVER_PREFIX "allocated_memory", void_t), {first, second});
626 
627  source_locationt loc;
628  loc.set_file(linker_script);
629  loc.set_comment("linker script-defined region:\n"+d["commt"].value+":\n"+
630  d["annot"].value);
631  f.add_source_location()=loc;
632 
634  i.make_function_call(f);
635  initialize_instructions.push_front(i);
636  }
637 
638  if(!symbol_table.has_symbol(CPROVER_PREFIX "allocated_memory"))
639  {
640  symbolt sym;
641  sym.name=CPROVER_PREFIX "allocated_memory";
642  sym.pretty_name=CPROVER_PREFIX "allocated_memory";
643  sym.is_lvalue=sym.is_static_lifetime=true;
644  const code_typet void_t({}, empty_typet());
645  sym.type=void_t;
646  symbol_table.add(sym);
647  }
648 
649  for(const auto &d : to_json_array(data["addresses"]))
650  {
651  source_locationt loc;
652  loc.set_file(linker_script);
653  loc.set_comment("linker script-defined symbol: char *"+
654  d["sym"].value+"="+"(char *)"+d["val"].value+"u;");
655 
656  symbol_exprt lhs(d["sym"].value, pointer_type(char_type()));
657 
658  constant_exprt rhs;
660  string2integer(d["val"].value), unsigned_int_type().get_width()));
661  rhs.type()=unsigned_int_type();
662 
663  exprt rhs_tc =
665 
666  linker_values.emplace(
667  irep_idt(d["sym"].value), std::make_pair(lhs, rhs_tc));
668 
669  code_assignt assign(lhs, rhs_tc);
670  assign.add_source_location()=loc;
672  assign_i.make_assignment(assign);
673  initialize_instructions.push_front(assign_i);
674  }
675  return 0;
676 }
677 #endif
678 
680  std::list<irep_idt> &linker_defined_symbols,
681  const symbol_tablet &symbol_table,
682  const std::string &out_file,
683  const std::string &def_out_file)
684 {
685  for(auto const &pair : symbol_table.symbols)
686  {
687  if(
688  pair.second.is_extern && pair.second.value.is_nil() &&
689  pair.second.name != CPROVER_PREFIX "memory")
690  {
691  linker_defined_symbols.push_back(pair.second.name);
692  }
693  }
694 
695  std::ostringstream linker_def_str;
696  std::copy(
697  linker_defined_symbols.begin(),
698  linker_defined_symbols.end(),
699  std::ostream_iterator<irep_idt>(linker_def_str, "\n"));
700  log.debug() << "Linker-defined symbols: [" << linker_def_str.str() << "]\n"
701  << messaget::eom;
702 
703  temporary_filet linker_def_infile("goto-cc-linker-defs", "");
704  std::ofstream linker_def_file(linker_def_infile());
705  linker_def_file << linker_def_str.str();
706  linker_def_file.close();
707 
708  std::vector<std::string> argv=
709  {
710  "ls_parse.py",
711  "--script", cmdline.get_value('T'),
712  "--object", out_file,
713  "--sym-file", linker_def_infile(),
714  "--out-file", def_out_file
715  };
716 
718  argv.push_back("--very-verbose");
720  argv.push_back("--verbose");
721 
722  log.debug() << "RUN:";
723  for(std::size_t i=0; i<argv.size(); i++)
724  log.debug() << " " << argv[i];
725  log.debug() << messaget::eom;
726 
727  int rc = run(argv[0], argv, linker_def_infile(), def_out_file, "");
728  if(rc!=0)
729  log.warning() << "Problem parsing linker script" << messaget::eom;
730 
731  return rc;
732 }
733 
735  const std::list<irep_idt> &linker_defined_symbols,
736  const linker_valuest &linker_values)
737 {
738  int fail=0;
739  for(const auto &sym : linker_defined_symbols)
740  if(linker_values.find(sym)==linker_values.end())
741  {
742  fail=1;
743  log.error() << "Variable '" << sym
744  << "' was declared extern but never given "
745  << "a value, even in a linker script" << messaget::eom;
746  }
747 
748  for(const auto &pair : linker_values)
749  {
750  auto it=std::find(linker_defined_symbols.begin(),
751  linker_defined_symbols.end(), pair.first);
752  if(it==linker_defined_symbols.end())
753  {
754  fail=1;
755  log.error()
756  << "Linker script-defined symbol '" << pair.first << "' was "
757  << "either defined in the C source code, not declared extern in "
758  << "the C source code, or does not appear in the C source code"
759  << messaget::eom;
760  }
761  }
762  return fail;
763 }
764 
766 {
767  if(!data.is_object())
768  return true;
769 
770  const json_objectt &data_object = to_json_object(data);
771  return (
772  !(data_object.find("regions") != data_object.end() &&
773  data_object.find("addresses") != data_object.end() &&
774  data["regions"].is_array() && data["addresses"].is_array() &&
775  std::all_of(
776  to_json_array(data["addresses"]).begin(),
777  to_json_array(data["addresses"]).end(),
778  [](const jsont &j) {
779  if(!j.is_object())
780  return false;
781 
782  const json_objectt &address = to_json_object(j);
783  return address.find("val") != address.end() &&
784  address.find("sym") != address.end() &&
785  address["val"].is_number() && address["sym"].is_string();
786  }) &&
787  std::all_of(
788  to_json_array(data["regions"]).begin(),
789  to_json_array(data["regions"]).end(),
790  [](const jsont &j) {
791  if(!j.is_object())
792  return false;
793 
794  const json_objectt &region = to_json_object(j);
795  return region.find("start") != region.end() &&
796  region.find("size") != region.end() &&
797  region.find("annot") != region.end() &&
798  region.find("commt") != region.end() &&
799  region.find("start-symbol") != region.end() &&
800  region.find("has-end-symbol") != region.end() &&
801  region.find("section") != region.end() &&
802  region["start"].is_number() && region["size"].is_number() &&
803  region["annot"].is_string() &&
804  region["start-symbol"].is_string() &&
805  region["section"].is_string() && region["commt"].is_string() &&
806  ((region["has-end-symbol"].is_true() &&
807  region.find("end-symbol") != region.end() &&
808  region["end-symbol"].is_string()) ||
809  (region["has-end-symbol"].is_false() &&
810  region.find("size-symbol") != region.end() &&
811  region.find("end-symbol") == region.end() &&
812  region["size-symbol"].is_string()));
813  })));
814 }
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
Definition: arith_tools.cpp:99
irep_idt integer2bvrep(const mp_integer &src, std::size_t width)
convert an integer to bit-vector representation with given width This uses two's complement for negat...
std::string array_name(const namespacet &ns, const exprt &expr)
Definition: array_name.cpp:19
unsignedbv_typet unsigned_int_type()
Definition: c_types.cpp:54
unsignedbv_typet size_type()
Definition: c_types.cpp:68
pointer_typet pointer_type(const typet &subtype)
Definition: c_types.cpp:253
bitvector_typet char_type()
Definition: c_types.cpp:124
Operator to return the address of an object.
Definition: pointer_expr.h:361
exprt & object()
Definition: pointer_expr.h:370
Arrays with given size.
Definition: std_types.h:763
std::string get_value(char option) const
Definition: cmdline.cpp:48
virtual bool isset(char option) const
Definition: cmdline.cpp:30
A codet representing an assignment in the program.
codet representation of a function call statement.
Base type of functions.
Definition: std_types.h:539
static bool write_bin_object_file(const std::string &file_name, const goto_modelt &src_goto_model, bool validate_goto_model, message_handlert &message_handler)
Writes the goto functions of src_goto_model to a binary format object file.
Definition: compile.cpp:574
A constant literal expression.
Definition: std_expr.h:2807
void set_value(const irep_idt &value)
Definition: std_expr.h:2820
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition: dstring.h:37
The empty type.
Definition: std_types.h:51
Base class for all expressions.
Definition: expr.h:54
source_locationt & add_source_location()
Definition: expr.h:235
const source_locationt & source_location() const
Definition: expr.h:230
typet & type()
Return the type of the expression.
Definition: expr.h:82
operandst & operands()
Definition: expr.h:92
function_mapt function_map
symbol_tablet symbol_table
Symbol table.
Definition: goto_model.h:30
goto_functionst goto_functions
GOTO functions.
Definition: goto_model.h:33
This class represents an instruction in the GOTO intermediate representation.
Definition: goto_program.h:180
A generic container class for the GOTO intermediate representation of one function.
Definition: goto_program.h:73
instructionst instructions
The list of instructions in the goto program.
Definition: goto_program.h:598
std::list< instructiont > instructionst
Definition: goto_program.h:590
Array index operator.
Definition: std_expr.h:1328
exprt & array()
Definition: std_expr.h:1353
exprt & index()
Definition: std_expr.h:1363
const irep_idt & id() const
Definition: irep.h:396
iterator find(const std::string &key)
Definition: json.h:356
iterator end()
Definition: json.h:386
Definition: json.h:27
std::string value
Definition: json.h:132
bool is_object() const
Definition: json.h:56
linker_script_merget(const std::string &elf_binary, const std::string &goto_binary, const cmdlinet &, message_handlert &)
int pointerize_linker_defined_symbols(goto_modelt &, const linker_valuest &)
convert the type of linker script-defined symbols to char*
void symbols_to_pointerize(const linker_valuest &linker_values, const exprt &expr, std::list< symbol_exprt > &to_pointerize) const
fill to_pointerize with names of linker symbols appearing in expr
int add_linker_script_definitions()
Add values of linkerscript-defined symbols to the goto-binary.
const std::string & elf_binary
int get_linker_script_data(std::list< irep_idt > &linker_defined_symbols, const symbol_tablet &symbol_table, const std::string &out_file, const std::string &def_out_file)
Write linker script definitions to linker_data.
std::map< irep_idt, std::pair< symbol_exprt, exprt > > linker_valuest
int ls_data2instructions(jsont &data, const std::string &linker_script, symbol_tablet &symbol_table, linker_valuest &linker_values)
Write a list of definitions derived from data into the symbol_table.
int pointerize_subexprs_of(exprt &expr, std::list< symbol_exprt > &to_pointerize, const linker_valuest &linker_values)
const cmdlinet & cmdline
std::list< replacement_predicatet > replacement_predicates
The "shapes" of expressions to be replaced by a pointer.
int replace_expr(exprt &old_expr, const linker_valuest &linker_values, const symbol_exprt &old_symbol, const irep_idt &ident, const std::string &shape)
do the actual replacement of an expr with a new pointer expr
int goto_and_object_mismatch(const std::list< irep_idt > &linker_defined_symbols, const linker_valuest &linker_values)
one-to-one correspondence between extern & linker symbols
int linker_data_is_malformed(const jsont &data) const
Validate output of the scripts/ls_parse.py tool.
const std::string & goto_binary
unsigned get_verbosity() const
Definition: message.h:54
source_locationt source_location
Definition: message.h:247
mstreamt & error() const
Definition: message.h:399
mstreamt & warning() const
Definition: message.h:404
mstreamt & debug() const
Definition: message.h:429
message_handlert & get_message_handler()
Definition: message.h:184
@ M_DEBUG
Definition: message.h:171
@ M_RESULT
Definition: message.h:170
static eomt eom
Definition: message.h:297
A namespacet is essentially one or two symbol tables bound together, to allow for symbol lookups in t...
Definition: namespace.h:91
The plus expression Associativity is not specified.
Definition: std_expr.h:914
Patterns of expressions that should be replaced.
void set_comment(const irep_idt &comment)
void set_file(const irep_idt &file)
Expression to hold a symbol (variable)
Definition: std_expr.h:80
const irep_idt & get_identifier() const
Definition: std_expr.h:109
symbolt & get_writeable_ref(const irep_idt &name)
Find a symbol in the symbol table for read-write access.
const symbolst & symbols
Read-only field, used to look up symbols given their names.
bool has_symbol(const irep_idt &name) const
Check whether a symbol exists in the symbol table.
bool add(const symbolt &symbol)
Add a new symbol to the symbol table.
The symbol table.
Definition: symbol_table.h:14
virtual std::pair< symbolt &, bool > insert(symbolt symbol) override
Author: Diffblue Ltd.
Symbol table entry.
Definition: symbol.h:28
bool is_extern
Definition: symbol.h:66
bool is_static_lifetime
Definition: symbol.h:65
source_locationt location
Source code location of definition of symbol.
Definition: symbol.h:37
bool is_state_var
Definition: symbol.h:62
class symbol_exprt symbol_expr() const
Produces a symbol_exprt for a symbol.
Definition: symbol.cpp:121
typet type
Type of symbol.
Definition: symbol.h:31
irep_idt name
The unique identifier.
Definition: symbol.h:40
irep_idt pretty_name
Language-specific display name.
Definition: symbol.h:52
bool is_lvalue
Definition: symbol.h:66
exprt value
Initial value of symbol.
Definition: symbol.h:34
Semantic type conversion.
Definition: std_expr.h:1920
static exprt conditional_cast(const exprt &expr, const typet &type)
Definition: std_expr.h:1928
Compile and link source and object files.
void __CPROVER_allocated_memory(__CPROVER_size_t address, __CPROVER_size_t extent)
#define CPROVER_PREFIX
optionalt< exprt > zero_initializer(const typet &type, const source_locationt &source_location, const namespacet &ns)
Create the equivalent of zero for type type.
Expression Initialization.
void goto_convert(const codet &code, symbol_table_baset &symbol_table, goto_programt &dest, message_handlert &message_handler, const irep_idt &mode)
Goto Programs with Functions.
Symbol Table + CFG.
dstringt irep_idt
Definition: irep.h:37
const std::string & id2string(const irep_idt &d)
Definition: irep.h:47
json_arrayt & to_json_array(jsont &json)
Definition: json.h:426
json_objectt & to_json_object(jsont &json)
Definition: json.h:444
bool parse_json(std::istream &in, const std::string &filename, message_handlert &message_handler, jsont &dest)
Definition: json_parser.cpp:16
Merge linker script-defined symbols into a goto-program.
Magic numbers used throughout the codebase.
const std::size_t MAX_FLATTENED_ARRAY_SIZE
Definition: magic.h:11
const mp_integer string2integer(const std::string &n, unsigned base)
Definition: mp_arith.cpp:54
API to expression classes for Pointers.
const address_of_exprt & to_address_of_expr(const exprt &expr)
Cast an exprt to an address_of_exprt.
Definition: pointer_expr.h:398
static bool read_goto_binary(const std::string &filename, symbol_tablet &, goto_functionst &, message_handlert &)
Read a goto binary from a file, but do not update config.
Read Goto Programs.
int run(const std::string &what, const std::vector< std::string > &argv)
Definition: run.cpp:48
BigInt mp_integer
Definition: smt_terms.h:12
#define CHECK_RETURN(CONDITION)
Definition: invariant.h:495
static optionalt< codet > static_lifetime_init(const irep_idt &identifier, symbol_tablet &symbol_table)
#define INITIALIZE_FUNCTION
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
Definition: std_expr.h:189
const index_exprt & to_index_expr(const exprt &expr)
Cast an exprt to an index_exprt.
Definition: std_expr.h:1391
unsigned safe_string2unsigned(const std::string &str, int base)
Definition: string2int.cpp:16
std::string to_string(const string_not_contains_constraintt &expr)
Used for debug printing.
Definition: kdev_t.h:24
Definition: kdev_t.h:19
static bool failed(bool error_indicator)