cprover
convert_string_literal.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: C/C++ Language Conversion
4 
5 Author: Daniel Kroening, kroening@kroening.com
6 
7 \*******************************************************************/
8 
11 
12 #include "convert_string_literal.h"
13 
14 #include <cassert>
15 
16 #include <util/arith_tools.h>
17 #include <util/c_types.h>
18 #include <util/unicode.h>
19 
20 #include "../string_constant.h"
21 
22 #include "unescape_string.h"
23 
24 std::basic_string<unsigned int> convert_one_string_literal(
25  const std::string &src)
26 {
27  assert(src.size()>=2);
28 
29  if(src[0]=='u' && src[1]=='8')
30  {
31  assert(src[src.size()-1]=='"');
32  assert(src[2]=='"');
33 
34  std::basic_string<unsigned int> value=
35  unescape_wide_string(std::string(src, 3, src.size()-4));
36 
37  // turn into utf-8
38  std::string utf8_value=utf32_to_utf8(value);
39 
40  // pad into wide string
41  value.resize(utf8_value.size());
42  for(std::size_t i=0; i<utf8_value.size(); i++)
43  value[i]=utf8_value[i];
44 
45  return value;
46  }
47  else if(src[0]=='L' || src[0]=='u' || src[0]=='U')
48  {
49  assert(src[src.size()-1]=='"');
50  assert(src[1]=='"');
51 
52  return unescape_wide_string(std::string(src, 2, src.size()-3));
53  }
54  else
55  {
56  assert(src[0]=='"');
57  assert(src[src.size()-1]=='"');
58 
59  std::string char_value=
60  unescape_string(std::string(src, 1, src.size()-2));
61 
62  // pad into wide string
63  std::basic_string<unsigned int> value;
64  value.resize(char_value.size());
65  for(std::size_t i=0; i<char_value.size(); i++)
66  value[i]=char_value[i];
67 
68  return value;
69  }
70 }
71 
72 exprt convert_string_literal(const std::string &src)
73 {
74  // note that 'src' could be a concatenation of string literals,
75  // e.g., something like "asd" "xyz".
76  // GCC allows "asd" L"xyz"!
77 
78  std::basic_string<unsigned int> value;
79 
80  char wide=0;
81 
82  for(std::size_t i=0; i<src.size(); i++)
83  {
84  char ch=src[i];
85 
86  // skip whitespace/newline
87  if(ch!='L' && ch!='u' && ch!='U' && ch!='"')
88  continue;
89 
90  if(ch=='L')
91  wide=ch;
92  if((ch=='u' || ch=='U') && i+1<src.size() && src[i+1]=='"')
93  wide=ch;
94 
95  // find start of sequence
96  std::size_t j=src.find('"', i);
97  if(j==std::string::npos)
98  throw "invalid string constant `"+src+"'";
99 
100  // find end of sequence, considering escaping
101  for(++j; j<src.size() && src[j]!='"'; ++j)
102  if(src[j]=='\\') // skip next character
103  ++j;
104 
105  assert(j<=src.size());
106  if(j==src.size())
107  throw "non-terminated string constant `"+src+"'";
108 
109  std::string tmp_src=std::string(src, i, j-i+1);
110  std::basic_string<unsigned int> tmp_value=
112  value.append(tmp_value);
113  i=j;
114  }
115 
116  if(wide!=0)
117  {
118  // add implicit trailing zero
119  value.push_back(0);
120 
121  // L is wchar_t, u is char16_t, U is char32_t.
122  typet subtype;
123 
124  switch(wide)
125  {
126  case 'L': subtype=wchar_t_type(); break;
127  case 'u': subtype=char16_t_type(); break;
128  case 'U': subtype=char32_t_type(); break;
129  default: assert(false);
130  }
131 
132  exprt result=exprt(ID_array);
133  result.set(ID_C_string_constant, true);
134  result.type()=typet(ID_array);
135  result.type().subtype()=subtype;
136  result.type().set(ID_size, from_integer(value.size(), index_type()));
137 
138  result.operands().resize(value.size());
139  for(std::size_t i=0; i<value.size(); i++)
140  result.operands()[i]=from_integer(value[i], subtype);
141 
142  return result;
143  }
144  else
145  {
146  std::string char_value;
147 
148  char_value.resize(value.size());
149 
150  for(std::size_t i=0; i<value.size(); i++)
151  {
152  // Loss of data here if value[i]>255.
153  // gcc issues a warning in this case.
154  char_value[i]=value[i];
155  }
156 
157  string_constantt result;
158  result.set_value(char_value);
159 
160  return result;
161  }
162 }
The type of an expression.
Definition: type.h:20
C/C++ Language Conversion.
void set_value(const irep_idt &value)
std::string utf32_to_utf8(const std::basic_string< unsigned int > &s)
Definition: unicode.cpp:143
unsignedbv_typet char32_t_type()
Definition: c_types.cpp:174
typet & type()
Definition: expr.h:60
std::string unescape_string(const std::string &src)
std::basic_string< unsigned int > convert_one_string_literal(const std::string &src)
ANSI-C Language Conversion.
bitvector_typet index_type()
Definition: c_types.cpp:15
unsignedbv_typet char16_t_type()
Definition: c_types.cpp:164
bitvector_typet wchar_t_type()
Definition: c_types.cpp:148
Base class for all expressions.
Definition: expr.h:46
exprt convert_string_literal(const std::string &src)
const typet & subtype() const
Definition: type.h:31
operandst & operands()
Definition: expr.h:70
constant_exprt from_integer(const mp_integer &int_value, const typet &type)
const irept & find(const irep_namet &name) const
Definition: irep.cpp:285
std::basic_string< unsigned int > unescape_wide_string(const std::string &src)
void set(const irep_namet &name, const irep_idt &value)
Definition: irep.h:214