cprover
string_constraint_generator_transformation.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Generates string constraints for string transformations,
4  that is, functions taking one string and returning another
5 
6 Author: Romain Brenguier, romain.brenguier@diffblue.com
7 
8 \*******************************************************************/
9 
13 
16 #include <util/arith_tools.h>
17 
37 {
38  PRECONDITION(f.arguments().size() == 4);
39  const array_string_exprt res =
42  const exprt &k = f.arguments()[3];
43  const typet &index_type = s1.length().type();
44  const typet &char_type = s1.content().type().subtype();
45 
46  // We add axioms:
47  // a1 : |res|=k
48  // a2 : forall i< min(|s1|, k) .res[i] = s1[i]
49  // a3 : forall |s1| <= i < |res|. res[i] = 0
50 
51  lemmas.push_back(res.axiom_for_has_length(k));
52 
53  const symbol_exprt idx = fresh_univ_index("QA_index_set_length", index_type);
54  const string_constraintt a2(
55  idx,
56  zero_if_negative(minimum(s1.length(), k)),
57  equal_exprt(s1[idx], res[idx]));
58  constraints.push_back(a2);
59 
60  symbol_exprt idx2 = fresh_univ_index("QA_index_set_length2", index_type);
62  idx2,
63  zero_if_negative(s1.length()),
64  zero_if_negative(res.length()),
65  equal_exprt(res[idx2], constant_char(0, char_type)));
66  constraints.push_back(a3);
67 
68  return from_integer(0, signedbv_typet(32));
69 }
70 
78 // NOLINTNEXTLINE
92 {
94  PRECONDITION(args.size() == 4 || args.size() == 5);
95  const array_string_exprt str = get_string_expr(args[2]);
96  const array_string_exprt res = char_array_of_pointer(args[1], args[0]);
97  const exprt &i = args[3];
98  const exprt j = args.size() == 5 ? args[4] : str.length();
99  return add_axioms_for_substring(res, str, i, j);
100 }
101 
117  const array_string_exprt &res,
118  const array_string_exprt &str,
119  const exprt &start,
120  const exprt &end)
121 {
122  const typet &index_type = str.length().type();
123  PRECONDITION(start.type()==index_type);
124  PRECONDITION(end.type()==index_type);
125 
126  const exprt start1 = maximum(start, from_integer(0, start.type()));
127  const exprt end1 = maximum(minimum(end, str.length()), start1);
128 
129  // Axiom 1.
130  lemmas.push_back(equal_exprt(res.length(), minus_exprt(end1, start1)));
131 
132  // Axiom 2.
133  constraints.push_back([&] {
134  const symbol_exprt idx = fresh_univ_index("QA_index_substring", index_type);
135  return string_constraintt(
136  idx,
137  zero_if_negative(res.length()),
138  equal_exprt(res[idx], str[plus_exprt(start1, idx)]));
139  }());
140 
141  return from_integer(0, signedbv_typet(32));
142 }
143 
172 {
173  PRECONDITION(f.arguments().size() == 3);
174  const array_string_exprt &str = get_string_expr(f.arguments()[2]);
175  const array_string_exprt &res =
176  char_array_of_pointer(f.arguments()[1], f.arguments()[0]);
177  const typet &index_type = str.length().type();
178  const typet &char_type = str.content().type().subtype();
179  const symbol_exprt idx = fresh_exist_index("index_trim", index_type);
180  const exprt space_char = from_integer(' ', char_type);
181 
182  // Axiom 1.
183  lemmas.push_back(str.axiom_for_length_ge(plus_exprt(idx, res.length())));
184 
185  binary_relation_exprt a2(idx, ID_ge, from_integer(0, index_type));
186  lemmas.push_back(a2);
187 
188  exprt a3=str.axiom_for_length_ge(idx);
189  lemmas.push_back(a3);
190 
191  exprt a4=res.axiom_for_length_ge(
193  lemmas.push_back(a4);
194 
195  exprt a5 = res.axiom_for_length_le(str.length());
196  lemmas.push_back(a5);
197 
198  symbol_exprt n=fresh_univ_index("QA_index_trim", index_type);
199  binary_relation_exprt non_print(str[n], ID_le, space_char);
200  string_constraintt a6(n, zero_if_negative(idx), non_print);
201  constraints.push_back(a6);
202 
203  // Axiom 7.
204  constraints.push_back([&] {
205  const symbol_exprt n2 = fresh_univ_index("QA_index_trim2", index_type);
206  const minus_exprt bound(minus_exprt(str.length(), idx), res.length());
207  const binary_relation_exprt eqn2(
208  str[plus_exprt(idx, plus_exprt(res.length(), n2))], ID_le, space_char);
209  return string_constraintt(n2, zero_if_negative(bound), eqn2);
210  }());
211 
212  symbol_exprt n3=fresh_univ_index("QA_index_trim3", index_type);
213  equal_exprt eqn3(res[n3], str[plus_exprt(n3, idx)]);
214  string_constraintt a8(n3, zero_if_negative(res.length()), eqn3);
215  constraints.push_back(a8);
216 
217  // Axiom 9.
218  lemmas.push_back([&] {
219  const plus_exprt index_before(
220  idx, minus_exprt(res.length(), from_integer(1, index_type)));
221  const binary_relation_exprt no_space_before(
222  str[index_before], ID_gt, space_char);
223  return or_exprt(
224  equal_exprt(idx, str.length()),
225  and_exprt(
226  binary_relation_exprt(str[idx], ID_gt, space_char), no_space_before));
227  }());
228  return from_integer(0, f.type());
229 }
230 
241 {
242  PRECONDITION(f.arguments().size() == 3);
243  const array_string_exprt res =
244  char_array_of_pointer(f.arguments()[1], f.arguments()[0]);
245  const array_string_exprt str = get_string_expr(f.arguments()[2]);
246  return add_axioms_for_to_lower_case(res, str);
247 }
248 
251 static exprt is_upper_case(const exprt &character)
252 {
253  const exprt char_A = from_integer('A', character.type());
254  const exprt char_Z = from_integer('Z', character.type());
255  exprt::operandst upper_case;
256  // Characters between 'A' and 'Z' are upper-case
257  upper_case.push_back(
258  and_exprt(
259  binary_relation_exprt(char_A, ID_le, character),
260  binary_relation_exprt(character, ID_le, char_Z)));
261 
262  // Characters between 0xc0 (latin capital A with grave)
263  // and 0xd6 (latin capital O with diaeresis) are upper-case
264  upper_case.push_back(
265  and_exprt(
267  from_integer(0xc0, character.type()), ID_le, character),
269  character, ID_le, from_integer(0xd6, character.type()))));
270 
271  // Characters between 0xd8 (latin capital O with stroke)
272  // and 0xde (latin capital thorn) are upper-case
273  upper_case.push_back(
274  and_exprt(
276  from_integer(0xd8, character.type()), ID_le, character),
278  character, ID_le, from_integer(0xde, character.type()))));
279  return disjunction(upper_case);
280 }
281 
284 static exprt is_lower_case(const exprt &character)
285 {
286  return is_upper_case(
287  minus_exprt(character, from_integer(0x20, character.type())));
288 }
289 
301  const array_string_exprt &res,
302  const array_string_exprt &str)
303 {
304  const typet &char_type = res.type().subtype();
305  const typet &index_type = res.length().type();
306  const exprt char_A = from_integer('A', char_type);
307  const exprt char_Z = from_integer('Z', char_type);
308 
309  // \todo for now, only characters in Basic Latin and Latin-1 supplement
310  // are supported (up to 0x100), we should add others using case mapping
311  // information from the UnicodeData file.
312 
313  lemmas.push_back(equal_exprt(res.length(), str.length()));
314 
315  constraints.push_back([&] {
316  const symbol_exprt idx = fresh_univ_index("QA_lower_case", index_type);
317  const exprt conditional_convert = [&] {
318  // The difference between upper-case and lower-case for the basic latin and
319  // latin-1 supplement is 0x20.
320  const exprt diff = from_integer(0x20, char_type);
321  const exprt converted = equal_exprt(res[idx], plus_exprt(str[idx], diff));
322  const exprt non_converted = equal_exprt(res[idx], str[idx]);
323  return if_exprt(is_upper_case(str[idx]), converted, non_converted);
324  }();
325 
326  return string_constraintt(
327  idx, zero_if_negative(res.length()), conditional_convert);
328  }());
329 
330  return from_integer(0, get_return_code_type());
331 }
332 
347  const array_string_exprt &res,
348  const array_string_exprt &str)
349 {
350  const typet &char_type = str.content().type().subtype();
351  const typet &index_type = str.length().type();
352  exprt char_a=constant_char('a', char_type);
353  exprt char_A=constant_char('A', char_type);
354  exprt char_z=constant_char('z', char_type);
355 
356  lemmas.push_back(equal_exprt(res.length(), str.length()));
357 
358  constraints.push_back([&] {
359  const symbol_exprt idx = fresh_univ_index("QA_upper_case", index_type);
360  const exprt converted =
361  minus_exprt(str[idx], from_integer(0x20, char_type));
362  return string_constraintt(
363  idx,
364  zero_if_negative(res.length()),
365  equal_exprt(
366  res[idx], if_exprt(is_lower_case(str[idx]), converted, str[idx])));
367  }());
368 
369  return from_integer(0, get_return_code_type());
370 }
371 
374 // NOLINTNEXTLINE
376 // NOLINTNEXTLINE
385 {
386  PRECONDITION(f.arguments().size() == 3);
387  array_string_exprt res =
388  char_array_of_pointer(f.arguments()[1], f.arguments()[0]);
390  return add_axioms_for_to_upper_case(res, str);
391 }
392 
400 {
401  PRECONDITION(f.arguments().size() == 5);
402  const array_string_exprt str = get_string_expr(f.arguments()[2]);
403  const array_string_exprt res =
404  char_array_of_pointer(f.arguments()[1], f.arguments()[0]);
405  const exprt &position = f.arguments()[3];
406  const exprt &character = f.arguments()[4];
407  return add_axioms_for_set_char(res, str, position, character);
408 }
409 
422  const array_string_exprt &res,
423  const array_string_exprt &str,
424  const exprt &position,
425  const exprt &character)
426 {
427  const binary_relation_exprt out_of_bounds(position, ID_ge, str.length());
428  lemmas.push_back(equal_exprt(res.length(), str.length()));
429  lemmas.push_back(
431  and_exprt(
433  from_integer(0, position.type()), ID_le, position),
434  binary_relation_exprt(position, ID_lt, res.length())),
435  equal_exprt(res[position], character)));
436  constraints.push_back([&] {
437  const symbol_exprt q = fresh_univ_index("QA_char_set", position.type());
438  const equal_exprt a3_body(res[q], str[q]);
439  return string_constraintt(
440  q, minimum(zero_if_negative(res.length()), position), a3_body);
441  }());
442  constraints.push_back([&] {
443  const symbol_exprt q2 = fresh_univ_index("QA_char_set2", position.type());
444  const plus_exprt lower_bound(position, from_integer(1, position.type()));
445  const equal_exprt a4_body(res[q2], str[q2]);
446  return string_constraintt(
447  q2, lower_bound, zero_if_negative(res.length()), a4_body);
448  }());
449  return if_exprt(
450  out_of_bounds,
453 }
454 
463  exprt expr1,
464  exprt expr2,
465  std::function<array_string_exprt(const exprt &)> get_string_expr)
466 {
467  if((expr1.type().id()==ID_unsignedbv
468  || expr1.type().id()==ID_char)
469  && (expr2.type().id()==ID_char
470  || expr2.type().id()==ID_unsignedbv))
471  return std::make_pair(expr1, expr2);
472  const auto expr1_str = get_string_expr(expr1);
473  const auto expr2_str = get_string_expr(expr2);
474  const auto expr1_length = numeric_cast<std::size_t>(expr1_str.length());
475  const auto expr2_length = numeric_cast<std::size_t>(expr2_str.length());
476  if(expr1_length && expr2_length && *expr1_length==1 && *expr2_length==1)
477  return std::make_pair(exprt(expr1_str[0]), exprt(expr2_str[0]));
478  return { };
479 }
480 
502 {
503  PRECONDITION(f.arguments().size() == 5);
505  array_string_exprt res =
506  char_array_of_pointer(f.arguments()[1], f.arguments()[0]);
507  if(
508  const auto maybe_chars =
509  to_char_pair(f.arguments()[3], f.arguments()[4], [this](const exprt &e) {
510  return get_string_expr(e);
511  }))
512  {
513  const auto old_char=maybe_chars->first;
514  const auto new_char=maybe_chars->second;
515 
516  lemmas.push_back(equal_exprt(res.length(), str.length()));
517 
518  symbol_exprt qvar = fresh_univ_index("QA_replace", str.length().type());
519  implies_exprt case1(
520  equal_exprt(str[qvar], old_char),
521  equal_exprt(res[qvar], new_char));
522  implies_exprt case2(
523  not_exprt(equal_exprt(str[qvar], old_char)),
524  equal_exprt(res[qvar], str[qvar]));
526  qvar, zero_if_negative(res.length()), and_exprt(case1, case2));
527  constraints.push_back(a2);
528  return from_integer(0, f.type());
529  }
530  return from_integer(1, f.type());
531 }
532 
539 {
540  PRECONDITION(f.arguments().size() == 4);
541  const array_string_exprt res =
542  char_array_of_pointer(f.arguments()[1], f.arguments()[0]);
543  const array_string_exprt str = get_string_expr(f.arguments()[2]);
544  exprt index_one=from_integer(1, str.length().type());
545  return add_axioms_for_delete(
546  res, str, f.arguments()[3], plus_exprt(f.arguments()[3], index_one));
547 }
548 
564  const array_string_exprt &res,
565  const array_string_exprt &str,
566  const exprt &start,
567  const exprt &end)
568 {
569  PRECONDITION(start.type()==str.length().type());
570  PRECONDITION(end.type()==str.length().type());
571  const typet &index_type = str.length().type();
572  const typet &char_type = str.content().type().subtype();
575  const exprt return_code1 = add_axioms_for_substring(
576  sub1, str, from_integer(0, str.length().type()), start);
577  const exprt return_code2 =
578  add_axioms_for_substring(sub2, str, end, str.length());
579  const exprt return_code3 = add_axioms_for_concat(res, sub1, sub2);
580  return bitor_exprt(return_code1, bitor_exprt(return_code2, return_code3));
581 }
582 
585 // NOLINTNEXTLINE
587 // NOLINTNEXTLINE
597 {
598  PRECONDITION(f.arguments().size() == 5);
599  const array_string_exprt res =
600  char_array_of_pointer(f.arguments()[1], f.arguments()[0]);
601  const array_string_exprt arg = get_string_expr(f.arguments()[2]);
602  return add_axioms_for_delete(res, arg, f.arguments()[3], f.arguments()[4]);
603 }
The type of an expression.
Definition: type.h:22
Boolean negation.
Definition: std_expr.h:3228
Generates string constraints to link results from string functions with their arguments.
A generic base class for relations, i.e., binary predicates.
Definition: std_expr.h:752
application of (mathematical) function
Definition: std_expr.h:4529
boolean OR
Definition: std_expr.h:2391
argumentst & arguments()
Definition: std_expr.h:4562
symbol_exprt fresh_univ_index(const irep_idt &prefix, const typet &type)
generate an index symbol to be used as an universaly quantified variable
The trinary if-then-else operator.
Definition: std_expr.h:3359
typet & type()
Definition: expr.h:56
exprt add_axioms_for_to_upper_case(const array_string_exprt &res, const array_string_exprt &expr)
Add axioms ensuring res corresponds to str in which lowercase characters of Basic Latin and Latin-1 s...
exprt add_axioms_for_trim(const function_application_exprt &f)
Remove leading and trailing whitespaces.
static optionalt< std::pair< exprt, exprt > > to_char_pair(exprt expr1, exprt expr2, std::function< array_string_exprt(const exprt &)> get_string_expr)
Convert two expressions to pair of chars If both expressions are characters, return pair of them If b...
exprt minimum(const exprt &a, const exprt &b)
boolean implication
Definition: std_expr.h:2339
equality
Definition: std_expr.h:1354
const irep_idt & id() const
Definition: irep.h:259
Bit-wise OR.
Definition: std_expr.h:2583
exprt add_axioms_for_to_lower_case(const array_string_exprt &res, const array_string_exprt &str)
Add axioms ensuring res corresponds to str in which uppercase characters have been converted to lower...
exprt add_axioms_for_delete_char_at(const function_application_exprt &expr)
add axioms corresponding to the StringBuilder.deleteCharAt java function
exprt::operandst argumentst
Definition: std_expr.h:4560
Fixed-width bit-vector with two&#39;s complement interpretation.
Definition: std_types.h:1271
exprt add_axioms_for_replace(const function_application_exprt &f)
Replace a character by another in a string.
nonstd::optional< T > optionalt
Definition: optional.h:35
boolean AND
Definition: std_expr.h:2255
optionalt< Target > numeric_cast(const exprt &arg)
Converts an expression to any integral type.
Definition: arith_tools.h:122
#define PRECONDITION(CONDITION)
Definition: invariant.h:242
exprt add_axioms_for_set_length(const function_application_exprt &f)
Reduce or extend a string to have the given length.
The plus expression.
Definition: std_expr.h:893
bitvector_typet index_type()
Definition: c_types.cpp:16
exprt add_axioms_for_char_set(const function_application_exprt &f)
Set a character to a specific value at an index of the string.
exprt disjunction(const exprt::operandst &op)
Definition: std_expr.cpp:24
std::vector< exprt > operandst
Definition: expr.h:45
exprt maximum(const exprt &a, const exprt &b)
static constant_exprt constant_char(int i, const typet &char_type)
generate constant character expression with character type.
exprt zero_if_negative(const exprt &expr)
Returns a non-negative version of the argument.
symbol_exprt fresh_exist_index(const irep_idt &prefix, const typet &type)
generate an index symbol which is existentially quantified
exprt add_axioms_for_delete(const array_string_exprt &res, const array_string_exprt &str, const exprt &start, const exprt &end)
Add axioms stating that res corresponds to the input str where we removed characters between the posi...
exprt add_axioms_for_substring(const array_string_exprt &res, const array_string_exprt &str, const exprt &start, const exprt &end)
Add axioms ensuring that res corresponds to the substring of str between indexes ‘start’ = max(st...
Base class for all expressions.
Definition: expr.h:42
exprt add_axioms_for_set_char(const array_string_exprt &res, const array_string_exprt &str, const exprt &position, const exprt &character)
Add axioms ensuring that the result res is similar to input string str where the character at index p...
Universally quantified string constraint
static exprt is_lower_case(const exprt &character)
Expression which is true for lower_case characters of the Basic Latin and Latin-1 supplement of unico...
binary minus
Definition: std_expr.h:959
Expression to hold a symbol (variable)
Definition: std_expr.h:90
int8_t s1
Definition: bytecode_info.h:59
const array_string_exprt & char_array_of_pointer(const exprt &pointer, const exprt &length)
Adds creates a new array if it does not already exists.
const typet & subtype() const
Definition: type.h:33
std::vector< string_constraintt > constraints
array_string_exprt fresh_string(const typet &index_type, const typet &char_type)
construct a string expression whose length and content are new variables
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
bitvector_typet char_type()
Definition: c_types.cpp:114
exprt add_axioms_for_concat(const array_string_exprt &res, const array_string_exprt &s1, const array_string_exprt &s2)
Add axioms enforcing that res is equal to the concatenation of s1 and s2.
array_string_exprt get_string_expr(const exprt &expr)
casts an expression to a string expression, or fetches the actual string_exprt in the case of a symbo...
static exprt is_upper_case(const exprt &character)
Expression which is true for uppercase characters of the Basic Latin and Latin-1 supplement of unicod...