Skip to content

Commit

Permalink
Added checksum checks for builtins and compiled code
Browse files Browse the repository at this point in the history
This to ensure when you have a compiled (C++) code build, there are no version mismatches
  • Loading branch information
aardappel committed May 1, 2024
1 parent 6d6b89c commit 5a1d4b5
Show file tree
Hide file tree
Showing 10 changed files with 72 additions and 30 deletions.
9 changes: 0 additions & 9 deletions dev/TODO.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,6 @@
"function foo with 2 arguments is ambiguous with the 1 version because of default arguments"
is super confusing.

- Compute a checksum of all code parsed by a particular build, and stick that both in the .pak file
header, and in the FlatBuffers metadata. That way, when doing a C++ build, these two numbers can
be checked against eachother to ensure they are from the same build.
What is also fragile is the Lobster binary that compiled the bytecode compared to the one compiled into
tocpp compiled lobster code: if one has a different set of builtin functions, it ends up calling the
wrong function (since the bytecode contains just indices). So should also checksum some aspect of
the runtime.. a commit id would be great but hard to add to the built process. Failing that, a checksum
of all runtime function names or whatever would be a start.

- Options for speeding up JIT mode.
Recent benchmarking has shown that while the C++ mode can be pretty fast (within 5x of native C++),
the JIT is surprisingly slow (sometimes up to 10x slower than C++ mode for pure number crunching).
Expand Down
1 change: 1 addition & 0 deletions dev/src/bytecode.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ table BytecodeFile {
ser_ids:[int];

build_info:string;
src_hash:ulong;
}

root_type BytecodeFile;
Expand Down
32 changes: 21 additions & 11 deletions dev/src/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,12 +117,13 @@ bool IsCompressed(string_view filename) {

static const uint8_t *magic = (uint8_t *)"LPAK";
static const size_t magic_size = 4;
static const size_t header_size = magic_size + sizeof(int64_t) * 3;
static const size_t header_size = magic_size + sizeof(int64_t) * 4;
static const char *bcname = "bytecode.lbc";
static const int64_t current_version = 2;

template <typename T> int64_t LE(T x) { return flatbuffers::EndianScalar((int64_t)x); };

string BuildPakFile(string &pakfile, string &bytecode, set<string> &files) {
string BuildPakFile(string &pakfile, string &bytecode, set<string> &files, uint64_t src_hash) {
// All offsets in 64bit, just in-case we ever want pakfiles > 4GB :)
// Since we're building this in memory, they can only be created by a 64bit build.
vector<int64_t> filestarts;
Expand Down Expand Up @@ -196,7 +197,9 @@ string BuildPakFile(string &pakfile, string &bytecode, set<string> &files) {
auto num = LE(filestarts.size());
// Finally the "header" (or do we call this a "tailer" ? ;)
auto header_start = pakfile.size();
auto version = LE(1);
auto version = LE(current_version);
auto src_hash_le = LE(src_hash);
pakfile.insert(pakfile.end(), (uint8_t *)&src_hash_le, (uint8_t *)(&src_hash_le + 1));
pakfile.insert(pakfile.end(), (uint8_t *)&num, (uint8_t *)(&num + 1));
pakfile.insert(pakfile.end(), (uint8_t *)&dirstart, (uint8_t *)(&dirstart + 1));
pakfile.insert(pakfile.end(), (uint8_t *)&version, (uint8_t *)(&version + 1));
Expand All @@ -208,7 +211,7 @@ string BuildPakFile(string &pakfile, string &bytecode, set<string> &files) {

// This just loads the directory part of a pakfile such that subsequent LoadFile calls know how
// to load from it.
bool LoadPakDir(const char *lpak) {
bool LoadPakDir(const char *lpak, uint64_t &src_hash_dest) {
// This supports reading from a pakfile > 4GB even on a 32bit system! (as long as individual
// files in it are <= 4GB).
auto plen = LoadFile(lpak, nullptr, 0, 0);
Expand All @@ -221,10 +224,11 @@ bool LoadPakDir(const char *lpak) {
memcpy(&r, p, sizeof(int64_t));
return LE(r);
};
auto num = read_unaligned64(header.c_str());
auto dirstart = read_unaligned64((int64_t *)header.c_str() + 1);
auto version = read_unaligned64((int64_t *)header.c_str() + 2);
if (version > 1) return false;
auto src_hash = (uint64_t)read_unaligned64((int64_t *)header.c_str());
auto num = read_unaligned64((int64_t *)header.c_str() + 1);
auto dirstart = read_unaligned64((int64_t *)header.c_str() + 2);
auto version = read_unaligned64((int64_t *)header.c_str() + 3);
if (version != current_version) return false;
if (dirstart > plen) return false;
string dir;
if (LoadFile(lpak, &dir, dirstart, plen - dirstart - (int64_t)header_size) < 0)
Expand All @@ -240,6 +244,7 @@ bool LoadPakDir(const char *lpak) {
LOG_INFO("pakfile dir: ", name, " : ", len);
AddPakFileEntry(lpak, name, off, len, read_unaligned64(uncompressed + i));
}
src_hash_dest = src_hash;
return true;
}

Expand Down Expand Up @@ -387,10 +392,11 @@ void Compile(NativeRegistry &nfr, string_view fn, string_view stringsource, stri
Optimizer opt(parser, st, tc, runtime_checks);
if (parsedump) *parsedump = parser.DumpAll(true);
CodeGen cg(parser, st, return_value, runtime_checks);
auto src_hash = lex.HashAll();
st.Serialize(cg.code, cg.type_table, cg.lineinfo, cg.sids, cg.stringtable, bytecode, cg.vtables,
filenames, cg.ser_ids);
filenames, cg.ser_ids, src_hash);
if (pakfile) {
auto err = BuildPakFile(*pakfile, bytecode, parser.pakfiles);
auto err = BuildPakFile(*pakfile, bytecode, parser.pakfiles, src_hash);
if (!err.empty()) THROW_OR_ABORT(err);
}
}
Expand Down Expand Up @@ -529,7 +535,8 @@ extern "C" int RunCompiledCodeMain(int argc, const char *const *argv, const uint
min_output_level = OUTPUT_WARN;
InitPlatform(GetMainDirFromExePath(argv[0]), aux_src_path, false, loader);
auto from_lpak = true;
if (!LoadPakDir("default.lpak")) {
uint64_t src_hash = 0;
if (!LoadPakDir("default.lpak", src_hash)) {
// FIXME: this is optional, we don't know if the compiled code wants to load this
// file, so we don't error or even warn if this file can't be found.
from_lpak = false;
Expand All @@ -548,6 +555,9 @@ extern "C" int RunCompiledCodeMain(int argc, const char *const *argv, const uint
};
for (int arg = 1; arg < argc; arg++) { vmargs.program_args.push_back(argv[arg]); }
lobster::VMAllocator vma(std::move(vmargs));
if (from_lpak && src_hash != vma.vm->bcf->src_hash()) {
THROW_OR_ABORT("lpak file from different version of the source code than the compiled code");
}
vma.vm->EvalProgram();
}
#ifdef USE_EXCEPTION_HANDLING
Expand Down
20 changes: 16 additions & 4 deletions dev/src/lobster/bytecode_generated.h
Original file line number Diff line number Diff line change
Expand Up @@ -579,7 +579,8 @@ struct BytecodeFile FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table {
VT_ENUMS = 32,
VT_VTABLES = 34,
VT_SER_IDS = 38,
VT_BUILD_INFO = 40
VT_BUILD_INFO = 40,
VT_SRC_HASH = 42
};
int32_t bytecode_version() const {
return GetField<int32_t>(VT_BYTECODE_VERSION, 0);
Expand Down Expand Up @@ -623,6 +624,9 @@ struct BytecodeFile FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table {
const ::flatbuffers::String *build_info() const {
return GetPointer<const ::flatbuffers::String *>(VT_BUILD_INFO);
}
uint64_t src_hash() const {
return GetField<uint64_t>(VT_SRC_HASH, 0);
}
bool Verify(::flatbuffers::Verifier &verifier) const {
return VerifyTableStart(verifier) &&
VerifyField<int32_t>(verifier, VT_BYTECODE_VERSION, 4) &&
Expand Down Expand Up @@ -658,6 +662,7 @@ struct BytecodeFile FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table {
verifier.VerifyVector(ser_ids()) &&
VerifyOffset(verifier, VT_BUILD_INFO) &&
verifier.VerifyString(build_info()) &&
VerifyField<uint64_t>(verifier, VT_SRC_HASH, 8) &&
verifier.EndTable();
}
};
Expand Down Expand Up @@ -708,6 +713,9 @@ struct BytecodeFileBuilder {
void add_build_info(::flatbuffers::Offset<::flatbuffers::String> build_info) {
fbb_.AddOffset(BytecodeFile::VT_BUILD_INFO, build_info);
}
void add_src_hash(uint64_t src_hash) {
fbb_.AddElement<uint64_t>(BytecodeFile::VT_SRC_HASH, src_hash, 0);
}
explicit BytecodeFileBuilder(::flatbuffers::FlatBufferBuilder &_fbb)
: fbb_(_fbb) {
start_ = fbb_.StartTable();
Expand All @@ -734,8 +742,10 @@ inline ::flatbuffers::Offset<BytecodeFile> CreateBytecodeFile(
::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset<bytecode::Enum>>> enums = 0,
::flatbuffers::Offset<::flatbuffers::Vector<int32_t>> vtables = 0,
::flatbuffers::Offset<::flatbuffers::Vector<int32_t>> ser_ids = 0,
::flatbuffers::Offset<::flatbuffers::String> build_info = 0) {
::flatbuffers::Offset<::flatbuffers::String> build_info = 0,
uint64_t src_hash = 0) {
BytecodeFileBuilder builder_(_fbb);
builder_.add_src_hash(src_hash);
builder_.add_build_info(build_info);
builder_.add_ser_ids(ser_ids);
builder_.add_vtables(vtables);
Expand Down Expand Up @@ -768,7 +778,8 @@ inline ::flatbuffers::Offset<BytecodeFile> CreateBytecodeFileDirect(
const std::vector<::flatbuffers::Offset<bytecode::Enum>> *enums = nullptr,
const std::vector<int32_t> *vtables = nullptr,
const std::vector<int32_t> *ser_ids = nullptr,
const char *build_info = nullptr) {
const char *build_info = nullptr,
uint64_t src_hash = 0) {
auto bytecode__ = bytecode ? _fbb.CreateVector<int32_t>(*bytecode) : 0;
auto typetable__ = typetable ? _fbb.CreateVector<int32_t>(*typetable) : 0;
auto stringtable__ = stringtable ? _fbb.CreateVector<::flatbuffers::Offset<::flatbuffers::String>>(*stringtable) : 0;
Expand Down Expand Up @@ -797,7 +808,8 @@ inline ::flatbuffers::Offset<BytecodeFile> CreateBytecodeFileDirect(
enums__,
vtables__,
ser_ids__,
build_info__);
build_info__,
src_hash);
}

inline const bytecode::BytecodeFile *GetBytecodeFile(const void *buf) {
Expand Down
2 changes: 1 addition & 1 deletion dev/src/lobster/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ extern pair<string, iint> RunTCC(NativeRegistry &nfr,
int runtime_checks,
bool dump_leaks);

extern bool LoadPakDir(const char *lpak);
extern bool LoadPakDir(const char *lpak, uint64_t &src_hash_dest);
extern bool LoadByteCode(string &bytecode);
extern void RegisterBuiltin(NativeRegistry &natreg, const char *ns, const char *name,
void (* regfun)(NativeRegistry &));
Expand Down
6 changes: 4 additions & 2 deletions dev/src/lobster/idents.h
Original file line number Diff line number Diff line change
Expand Up @@ -1286,7 +1286,8 @@ struct SymbolTable {
string &bytecode,
vector<int> &vtables,
vector<pair<string, string>> &filenames,
vector<type_elem_t> &ser_ids) {
vector<type_elem_t> &ser_ids,
uint64_t src_hash) {
flatbuffers::FlatBufferBuilder fbb;
// Always serialize this first! that way it can easily be left out of the generated C code.
auto codevec = fbb.CreateVector(code);
Expand Down Expand Up @@ -1327,7 +1328,8 @@ struct SymbolTable {
fbb.CreateVector(enumoffsets),
fbb.CreateVector(vtables),
fbb.CreateVector((vector<int> &)ser_ids),
fbb.CreateString(build_info.c_str(), build_info.size()));
fbb.CreateString(build_info.c_str(), build_info.size()),
src_hash);
bytecode::FinishBytecodeFileBuffer(fbb, bcf);
bytecode.assign(fbb.GetBufferPointer(), fbb.GetBufferPointer() + fbb.GetSize());
}
Expand Down
8 changes: 8 additions & 0 deletions dev/src/lobster/lex.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,14 @@ struct Lex : LoadedFile {
FirstToken();
}

uint64_t HashAll() {
uint64_t h = 0xABADCAFEDEADBEEF;
for (auto &src : allsources) {
h ^= FNV1A64(*src.get());
}
return h;
}

void FirstToken() {
Next();
if (token == T_LINEFEED) Next();
Expand Down
11 changes: 11 additions & 0 deletions dev/src/lobster/natreg.h
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,17 @@ struct NativeRegistry {
auto it = nfunlookup.find(name);
return it != nfunlookup.end() ? it->second : nullptr;
}

uint64_t HashAll() {
uint64_t h = 0xABADCAFEDEADBEEF;
for (auto nf : nfuns) {
h ^= FNV1A64(nf->name);
for (auto &a : nf->args) {
h ^= FNV1A64(a.name);
}
}
return h;
}
};

struct Line {
Expand Down
6 changes: 4 additions & 2 deletions dev/src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,8 +205,10 @@ int main(int argc, char* argv[]) {

string bytecode_buffer;
if (fn.empty()) {
if (!LoadPakDir(default_lpak))
THROW_OR_ABORT("Lobster programming language compiler/runtime (version "
uint64_t src_hash = 0; // Don't care, from same file as bytecode.
if (!LoadPakDir(default_lpak, src_hash))
THROW_OR_ABORT(
"Lobster programming language compiler/runtime (version "
GIT_COMMIT_INFOSTR ")\nno arguments given - cannot load "
+ (default_lpak + helptext));
// This will now come from the pakfile.
Expand Down
7 changes: 6 additions & 1 deletion dev/src/tocpp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -560,7 +560,12 @@ string ToCPP(NativeRegistry &natreg, string &sd, string_view bytecode_buffer, bo
}
if (cpp) sd += "extern \"C\" ";
sd += "void compiled_entry_point(VMRef vm, StackPtr sp) {\n";
if (!cpp) sd += " Entry(sizeof(Value));\n";
if (cpp) {
append(sd, " if (vm.nfr.HashAll() != ", natreg.HashAll(),
") vm.BuiltinError(\"code compiled with mismatching builtin function library\");\n");
} else {
sd += " Entry(sizeof(Value));\n";
}
append(sd, " fun_", starting_point, "(vm, sp);\n}\n\n");
if (cpp) {
sd += "int main(int argc, char *argv[]) {\n";
Expand Down

0 comments on commit 5a1d4b5

Please sign in to comment.