diff --git a/ci/scripts/build_example.sh b/ci/scripts/build_example.sh index 77abbc310..41714cf32 100755 --- a/ci/scripts/build_example.sh +++ b/ci/scripts/build_example.sh @@ -23,7 +23,8 @@ source_dir=${1} build_dir=${1}/build run_example=${ICEBERG_RUN_EXAMPLE:-OFF} -mkdir ${build_dir} +rm -rf "${build_dir}" +mkdir "${build_dir}" pushd ${build_dir} is_windows() { @@ -60,6 +61,3 @@ else fi popd - -# clean up between builds -rm -rf ${build_dir} diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt index aa051cd14..94523fb54 100644 --- a/src/iceberg/CMakeLists.txt +++ b/src/iceberg/CMakeLists.txt @@ -49,6 +49,7 @@ set(ICEBERG_SOURCES inheritable_metadata.cc json_serde.cc location_provider.cc + logging/cerr_logger.cc logging/logger.cc manifest/manifest_adapter.cc manifest/manifest_entry.cc @@ -133,6 +134,7 @@ set(ICEBERG_SOURCES util/struct_like_set.cc util/task_group.cc util/temporal_util.cc + util/thread_util.cc util/timepoint.cc util/transform_util.cc util/truncate_util.cc diff --git a/src/iceberg/logging/cerr_logger.cc b/src/iceberg/logging/cerr_logger.cc new file mode 100644 index 000000000..0856d963f --- /dev/null +++ b/src/iceberg/logging/cerr_logger.cc @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/logging/cerr_logger.h" + +#include +#include +#include +#include +#include +#include + +#include "iceberg/util/thread_util_internal.h" + +namespace iceberg { + +namespace { + +/// \brief Trailing path component of a source file path. +std::string_view Basename(std::string_view path) noexcept { + auto pos = path.find_last_of("/\\"); + return pos == std::string_view::npos ? path : path.substr(pos + 1); +} + +/// \brief Format a record into a single newline-terminated line. +std::string FormatLine(const LogMessage& message) { + auto now = + std::chrono::floor(std::chrono::system_clock::now()); + return std::format("{:%Y-%m-%dT%H:%M:%S}Z {} [{}] [{}:{}] {}\n", now, + ToString(message.level), OsThreadId(), + Basename(message.location.file_name()), message.location.line(), + message.message); +} + +} // namespace + +void CerrLogger::Log(LogMessage&& message) noexcept { + try { + std::string line = FormatLine(message); + std::lock_guard lock(mutex_); + std::cerr << line; + } catch (...) { + // Logging must never throw. Reached if either formatting or the write fails; + // emit a short marker best-effort and swallow anything further. + try { + std::lock_guard lock(mutex_); + std::cerr << "\n"; + } catch (...) { + } + } +} + +void CerrLogger::Flush() noexcept { + try { + std::lock_guard lock(mutex_); + std::cerr.flush(); + } catch (...) { + } +} + +} // namespace iceberg diff --git a/src/iceberg/logging/cerr_logger.h b/src/iceberg/logging/cerr_logger.h new file mode 100644 index 000000000..334f20391 --- /dev/null +++ b/src/iceberg/logging/cerr_logger.h @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/logging/cerr_logger.h +/// \brief Always-available std::cerr logging backend. + +#include +#include + +#include "iceberg/iceberg_export.h" +#include "iceberg/logging/log_level.h" +#include "iceberg/logging/logger.h" + +namespace iceberg { + +/// \brief Logger that writes one line per record to std::cerr. +/// +/// Line layout: `YYYY-MM-DDThh:mm:ss.mmmZ LEVEL [tid] [file:line] message`. +/// The minimum level is held in a lock-free atomic; a mutex serializes the +/// whole-line write so concurrent records never interleave. Pure standard +/// library -- always compiled, regardless of ICEBERG_SPDLOG. +class ICEBERG_EXPORT CerrLogger : public Logger { + public: + explicit CerrLogger(LogLevel level = LogLevel::kInfo) : level_(level) {} + + bool ShouldLog(LogLevel level) const noexcept override { + return level >= level_.load(std::memory_order_relaxed); + } + void Log(LogMessage&& message) noexcept override; + void SetLevel(LogLevel level) noexcept override { + level_.store(level, std::memory_order_relaxed); + } + LogLevel level() const noexcept override { + return level_.load(std::memory_order_relaxed); + } + void Flush() noexcept override; + + private: + std::atomic level_; + std::mutex mutex_; +}; + +} // namespace iceberg diff --git a/src/iceberg/logging/logger.cc b/src/iceberg/logging/logger.cc index 3adbf75dd..a8db67ca9 100644 --- a/src/iceberg/logging/logger.cc +++ b/src/iceberg/logging/logger.cc @@ -26,6 +26,8 @@ #include #include +#include "iceberg/logging/cerr_logger.h" + namespace iceberg { namespace { @@ -42,10 +44,9 @@ class NoopLogger final : public Logger { /// \brief Construct the process default logger for this build configuration. /// -/// This block ships only the interface and the no-op logger; the concrete -/// std::cerr and spdlog sinks (and the build-config selection between them) -/// arrive in later blocks, which update this factory. -std::shared_ptr MakeDefaultLogger() { return std::make_shared(); } +/// Uses the always-available std::cerr sink. The spdlog backend (preferred when +/// compiled in) is wired into this factory in a later block. +std::shared_ptr MakeDefaultLogger() { return std::make_shared(); } /// \brief The process-global default-logger slot. struct DefaultSlot { diff --git a/src/iceberg/logging/logger.h b/src/iceberg/logging/logger.h index 8a642f833..66f81501a 100644 --- a/src/iceberg/logging/logger.h +++ b/src/iceberg/logging/logger.h @@ -127,8 +127,8 @@ class ICEBERG_EXPORT LogMessage::Builder { /// \brief Well-known Logger::Initialize() property keys. /// /// `level` is honored by the base Logger::Initialize (parsed via -/// LogLevelFromString). `pattern` is honored by the formatting sinks -/// (CerrLogger, SpdLogger). +/// LogLevelFromString) on every backend. `pattern` is honored only by the +/// spdlog backend; CerrLogger uses a fixed layout and ignores it. inline constexpr std::string_view kLevelProperty = "level"; inline constexpr std::string_view kPatternProperty = "pattern"; @@ -150,8 +150,8 @@ class ICEBERG_EXPORT Logger { /// /// The base implementation applies the "level" property (parsed via /// LogLevelFromString); an unrecognized value is an InvalidArgument error. - /// Formatting sinks override this to also apply "pattern" and then delegate - /// to this base for "level". + /// The spdlog backend overrides this to also apply "pattern" and then delegates + /// to this base for "level"; CerrLogger uses the base as-is (fixed layout). virtual Status Initialize( const std::unordered_map& properties) { if (auto it = properties.find(std::string(kLevelProperty)); it != properties.end()) { diff --git a/src/iceberg/logging/meson.build b/src/iceberg/logging/meson.build index 3f7af4fec..e4ca111f4 100644 --- a/src/iceberg/logging/meson.build +++ b/src/iceberg/logging/meson.build @@ -15,4 +15,7 @@ # specific language governing permissions and limitations # under the License. -install_headers(['log_level.h', 'logger.h'], subdir: 'iceberg/logging') +install_headers( + ['cerr_logger.h', 'log_level.h', 'logger.h'], + subdir: 'iceberg/logging', +) diff --git a/src/iceberg/meson.build b/src/iceberg/meson.build index 58d79a402..71a4498f1 100644 --- a/src/iceberg/meson.build +++ b/src/iceberg/meson.build @@ -74,6 +74,7 @@ iceberg_sources = files( 'inspect/snapshots_table.cc', 'json_serde.cc', 'location_provider.cc', + 'logging/cerr_logger.cc', 'logging/logger.cc', 'manifest/manifest_adapter.cc', 'manifest/manifest_entry.cc', @@ -158,6 +159,7 @@ iceberg_sources = files( 'util/struct_like_set.cc', 'util/task_group.cc', 'util/temporal_util.cc', + 'util/thread_util.cc', 'util/timepoint.cc', 'util/transform_util.cc', 'util/truncate_util.cc', diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt index 0756c1eef..fcbc22126 100644 --- a/src/iceberg/test/CMakeLists.txt +++ b/src/iceberg/test/CMakeLists.txt @@ -102,7 +102,11 @@ add_iceberg_test(table_test table_test.cc table_update_test.cc) -add_iceberg_test(logging_test SOURCES log_level_test.cc logger_test.cc) +add_iceberg_test(logging_test + SOURCES + cerr_logger_test.cc + log_level_test.cc + logger_test.cc) add_iceberg_test(expression_test SOURCES diff --git a/src/iceberg/test/cerr_logger_test.cc b/src/iceberg/test/cerr_logger_test.cc new file mode 100644 index 000000000..9a44dfcac --- /dev/null +++ b/src/iceberg/test/cerr_logger_test.cc @@ -0,0 +1,209 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/logging/cerr_logger.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "iceberg/logging/log_level.h" +#include "iceberg/logging/logger.h" + +namespace iceberg { + +namespace { + +/// \brief RAII redirect of std::cerr to a stringstream for the test scope. +class CerrCapture { + public: + CerrCapture() : old_(std::cerr.rdbuf(buffer_.rdbuf())) {} + ~CerrCapture() { std::cerr.rdbuf(old_); } + std::string str() const { return buffer_.str(); } + + private: + std::ostringstream buffer_; + std::streambuf* old_; +}; + +LogMessage MakeMessage(LogLevel level, std::string text) { + return LogMessage{.level = level, + .message = std::move(text), + .location = std::source_location::current(), + .attributes = {}}; +} + +/// \brief First line of \p text, without the trailing newline. +std::string FirstLine(const std::string& text) { + auto pos = text.find('\n'); + return text.substr(0, pos == std::string::npos ? text.size() : pos); +} + +/// \brief A streambuf that throws on every write, to drive the never-throw path. +/// +/// Throws a payload-less exception on purpose: an exception type with a +/// heap-allocated what() string (e.g. std::runtime_error) trips an ASan +/// alloc-dealloc-mismatch as the string crosses the instrumented binary / +/// system libc++abi boundary during unwinding -- unrelated to the code here. +struct SinkFailure {}; +class ThrowingBuf : public std::streambuf { + protected: + int overflow(int /*ch*/) override { throw SinkFailure{}; } + std::streamsize xsputn(const char* /*s*/, std::streamsize /*n*/) override { + throw SinkFailure{}; + } +}; + +/// \brief RAII: point std::cerr at a throwing buffer with exceptions armed, and +/// restore all of cerr's state (buffer, exception mask, error flags) on exit -- +/// so a thrown assertion can never leak the dangling/throwing stream to other +/// tests. +class CerrThrowingGuard { + public: + CerrThrowingGuard() + : old_buf_(std::cerr.rdbuf(&buf_)), old_exc_(std::cerr.exceptions()) { + std::cerr.exceptions(std::ios::badbit | std::ios::failbit); // make << rethrow + } + ~CerrThrowingGuard() { + std::cerr.clear(); + std::cerr.exceptions(old_exc_); + std::cerr.rdbuf(old_buf_); + } + + private: + ThrowingBuf buf_; + std::streambuf* old_buf_; + std::ios_base::iostate old_exc_; +}; + +} // namespace + +TEST(CerrLoggerTest, DefaultLevelIsInfo) { + CerrLogger logger; + EXPECT_EQ(logger.level(), LogLevel::kInfo); + EXPECT_FALSE(logger.ShouldLog(LogLevel::kDebug)); + EXPECT_TRUE(logger.ShouldLog(LogLevel::kInfo)); + EXPECT_TRUE(logger.ShouldLog(LogLevel::kError)); +} + +TEST(CerrLoggerTest, SetLevelFilters) { + CerrLogger logger(LogLevel::kError); + EXPECT_FALSE(logger.ShouldLog(LogLevel::kWarn)); + logger.SetLevel(LogLevel::kTrace); + EXPECT_TRUE(logger.ShouldLog(LogLevel::kTrace)); +} + +TEST(CerrLoggerTest, LineContainsLevelAndMessage) { + CerrLogger logger; + CerrCapture capture; + logger.Log(MakeMessage(LogLevel::kError, "boom 42")); + std::string out = capture.str(); + EXPECT_NE(out.find("error"), std::string::npos); + EXPECT_NE(out.find("boom 42"), std::string::npos); + EXPECT_NE(out.find("cerr_logger_test.cc"), std::string::npos); + EXPECT_EQ(out.back(), '\n'); +} + +// The whole value of CerrLogger over a backend is its fixed line layout: +// `YYYY-MM-DDThh:mm:ss.mmmZ LEVEL [tid] [file:line] message`. Assert the shape, +// especially the .mmm millisecond field (which silently vanishes if the +// time_point is no longer floored to milliseconds). +TEST(CerrLoggerTest, LineMatchesFixedLayout) { + CerrLogger logger; + CerrCapture capture; + logger.Log(MakeMessage(LogLevel::kWarn, "hello world")); + const std::regex layout( + R"(^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z \w+ \[\d+\] \[\S+:\d+\] hello world$)"); + EXPECT_TRUE(std::regex_match(FirstLine(capture.str()), layout)) << capture.str(); +} + +// CerrLogger deliberately does not override Initialize: it inherits the base, +// which applies "level" and ignores unknown keys like "pattern". +TEST(CerrLoggerTest, InitializeAppliesLevelAndIgnoresPattern) { + CerrLogger logger; + auto status = logger.Initialize({{std::string(kLevelProperty), "warn"}, + {std::string(kPatternProperty), "ignored"}}); + ASSERT_TRUE(status.has_value()); + EXPECT_EQ(logger.level(), LogLevel::kWarn); +} + +TEST(CerrLoggerTest, FlushDoesNotThrow) { + CerrLogger logger; + CerrCapture capture; + logger.Log(MakeMessage(LogLevel::kError, "x")); + logger.Flush(); // best-effort; must not throw + EXPECT_NE(capture.str().find('x'), std::string::npos); +} + +// Log()/Flush() are noexcept: even when every write to the sink throws, they +// must swallow it (a leaked exception from a noexcept function -> std::terminate). +TEST(CerrLoggerTest, LogAndFlushNeverThrowWhenSinkThrows) { + CerrThrowingGuard guard; // cerr -> throwing sink; state restored on scope exit + CerrLogger logger(LogLevel::kTrace); + logger.Log(MakeMessage(LogLevel::kInfo, "x")); // write throws -> fallback also throws + logger.Flush(); + SUCCEED(); +} + +TEST(CerrLoggerTest, ConcurrentLogsDoNotInterleave) { + CerrLogger logger(LogLevel::kTrace); + CerrCapture capture; + constexpr int kThreads = 8; + constexpr int kPerThread = 50; + + std::vector threads; + for (int t = 0; t < kThreads; ++t) { + threads.emplace_back([&logger, t] { + for (int i = 0; i < kPerThread; ++i) { + // Distinct payload per record so an interleave would corrupt a line. + logger.Log(MakeMessage(LogLevel::kInfo, std::format("t{}-{}", t, i))); + } + }); + } + for (auto& thread : threads) thread.join(); + + // Each output line must be a complete, well-formed record ending in its own + // payload, and every (thread, index) payload must appear exactly once -- this + // proves the whole-line write is atomic, not merely that newlines were kept. + const std::regex line(R"(^.*\] (t\d+-\d+)$)"); + std::set seen; + std::istringstream in(capture.str()); + std::string row; + int lines = 0; + while (std::getline(in, row)) { + ++lines; + std::smatch m; + ASSERT_TRUE(std::regex_match(row, m, line)) << "garbled line: " << row; + EXPECT_TRUE(seen.insert(m[1].str()).second) << "duplicate payload: " << m[1].str(); + } + EXPECT_EQ(lines, kThreads * kPerThread); + EXPECT_EQ(seen.size(), static_cast(kThreads * kPerThread)); +} + +} // namespace iceberg diff --git a/src/iceberg/test/meson.build b/src/iceberg/test/meson.build index b01a61904..ba0f6230d 100644 --- a/src/iceberg/test/meson.build +++ b/src/iceberg/test/meson.build @@ -61,7 +61,13 @@ iceberg_tests = { 'table_update_test.cc', ), }, - 'logging_test': {'sources': files('log_level_test.cc', 'logger_test.cc')}, + 'logging_test': { + 'sources': files( + 'cerr_logger_test.cc', + 'log_level_test.cc', + 'logger_test.cc', + ), + }, 'expression_test': { 'sources': files( 'aggregate_test.cc', diff --git a/src/iceberg/util/thread_util.cc b/src/iceberg/util/thread_util.cc new file mode 100644 index 000000000..ed6cccfa9 --- /dev/null +++ b/src/iceberg/util/thread_util.cc @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/util/thread_util_internal.h" + +#if defined(_WIN32) +# include +#elif defined(__APPLE__) +# include +#else +# include + +# include +#endif + +namespace iceberg { + +uint64_t OsThreadId() noexcept { + static thread_local uint64_t tid = []() -> uint64_t { +#if defined(_WIN32) + return static_cast(::GetCurrentThreadId()); +#elif defined(__APPLE__) + uint64_t id = 0; + pthread_threadid_np(nullptr, &id); + return id; +#else + return static_cast(::syscall(SYS_gettid)); +#endif + }(); + return tid; +} + +} // namespace iceberg diff --git a/src/iceberg/util/thread_util_internal.h b/src/iceberg/util/thread_util_internal.h new file mode 100644 index 000000000..ebd18fca1 --- /dev/null +++ b/src/iceberg/util/thread_util_internal.h @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +namespace iceberg { + +/// \brief OS-native thread id for the calling thread, cached per thread. +/// +/// Returns the cross-process-correlatable id used by spdlog/glog (Linux +/// gettid, macOS pthread_threadid_np, Windows GetCurrentThreadId), not the +/// opaque std::thread::id -- and so avoids the std::formatter +/// (P2693) minimum-toolchain dependency. The first call on a thread performs +/// the OS query; later calls return the cached value (no syscall per call). +/// Declared here so the platform headers stay confined to the .cc. +uint64_t OsThreadId() noexcept; + +} // namespace iceberg