1//===- StringToOffsetTable.cpp - Emit a big concatenated string -*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/TableGen/StringToOffsetTable.h"
10#include "llvm/Support/FormatVariadic.h"
11#include "llvm/Support/raw_ostream.h"
12#include "llvm/TableGen/Error.h"
13#include "llvm/TableGen/Main.h"
14
15using namespace llvm;
16
17unsigned StringToOffsetTable::GetOrAddStringOffset(StringRef Str) {
18 auto [II, Inserted] = StringOffset.insert(KV: {Str, size()});
19 if (Inserted) {
20 // Add the string to the aggregate if this is the first time found.
21 AggregateString.append(first: Str.begin(), last: Str.end());
22 if (AppendZero)
23 AggregateString += '\0';
24 }
25
26 return II->second;
27}
28
29void StringToOffsetTable::EmitStringTableDef(raw_ostream &OS,
30 const Twine &Name) const {
31 // This generates a `llvm::StringTable` which expects that entries are null
32 // terminated. So fail with an error if `AppendZero` is false.
33 if (!AppendZero)
34 PrintFatalError(Msg: "llvm::StringTable requires null terminated strings");
35
36 OS << formatv(Fmt: R"(
37#ifdef __GNUC__
38#pragma GCC diagnostic push
39#pragma GCC diagnostic ignored "-Woverlength-strings"
40#endif
41static constexpr char {}Storage[] = )",
42 Vals: Name);
43
44 // MSVC silently miscompiles string literals longer than 64k in some
45 // circumstances. The build system sets EmitLongStrLiterals to false when it
46 // detects that it is targetting MSVC. When that option is false and the
47 // string table is longer than 64k, emit it as an array of character
48 // literals.
49 bool UseChars = !EmitLongStrLiterals && AggregateString.size() > (64 * 1024);
50 OS << (UseChars ? "{\n" : "\n");
51
52 ListSeparator LineSep(UseChars ? ",\n" : "\n");
53 SmallVector<StringRef> Strings(split(Str: AggregateString, Separator: '\0'));
54 // We should always have an empty string at the start, and because these are
55 // null terminators rather than separators, we'll have one at the end as
56 // well. Skip the end one.
57 assert(Strings.front().empty() && "Expected empty initial string!");
58 assert(Strings.back().empty() &&
59 "Expected empty string at the end due to terminators!");
60 Strings.pop_back();
61 for (StringRef Str : Strings) {
62 OS << LineSep << " ";
63 // If we can, just emit this as a string literal to be concatenated.
64 if (!UseChars) {
65 OS << "\"";
66 OS.write_escaped(Str);
67 OS << "\\0\"";
68 continue;
69 }
70
71 ListSeparator CharSep(", ");
72 for (char C : Str) {
73 OS << CharSep << "'";
74 OS.write_escaped(Str: StringRef(&C, 1));
75 OS << "'";
76 }
77 OS << CharSep << "'\\0'";
78 }
79 OS << LineSep << (UseChars ? "};" : " ;");
80
81 OS << formatv(Fmt: R"(
82#ifdef __GNUC__
83#pragma GCC diagnostic pop
84#endif
85
86static constexpr llvm::StringTable
87{0} = {0}Storage;
88)",
89 Vals: Name);
90}
91
92void StringToOffsetTable::EmitString(raw_ostream &O) const {
93 // Escape the string.
94 SmallString<256> EscapedStr;
95 raw_svector_ostream(EscapedStr).write_escaped(Str: AggregateString);
96
97 O << " \"";
98 unsigned CharsPrinted = 0;
99 for (unsigned i = 0, e = EscapedStr.size(); i != e; ++i) {
100 if (CharsPrinted > 70) {
101 O << "\"\n \"";
102 CharsPrinted = 0;
103 }
104 O << EscapedStr[i];
105 ++CharsPrinted;
106
107 // Print escape sequences all together.
108 if (EscapedStr[i] != '\\')
109 continue;
110
111 assert(i + 1 < EscapedStr.size() && "Incomplete escape sequence!");
112 if (isDigit(C: EscapedStr[i + 1])) {
113 assert(isDigit(EscapedStr[i + 2]) && isDigit(EscapedStr[i + 3]) &&
114 "Expected 3 digit octal escape!");
115 O << EscapedStr[++i];
116 O << EscapedStr[++i];
117 O << EscapedStr[++i];
118 CharsPrinted += 3;
119 } else {
120 O << EscapedStr[++i];
121 ++CharsPrinted;
122 }
123 }
124 O << "\"";
125}
126

source code of llvm/lib/TableGen/StringToOffsetTable.cpp