diff --git a/CMakeLists.txt b/CMakeLists.txt index cb0b2ae2b..eb1391238 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,12 +49,13 @@ option(ICEBERG_BUILD_HIVE "Build hive (HMS) catalog client" OFF) option(ICEBERG_BUILD_SQL_CATALOG "Build SQL catalog client" OFF) # Built-in SQL catalog database connectors. Disable all of them to build a SQL # catalog that only works with a user-supplied CatalogStore. -option(ICEBERG_SQL_SQLITE "Build the SQLite connector for the SQL catalog" OFF) -option(ICEBERG_SQL_POSTGRESQL "Build the PostgreSQL connector for the SQL catalog" OFF) -option(ICEBERG_SQL_MYSQL "Build the MySQL connector for the SQL catalog" OFF) +option(ICEBERG_SQL_SQLITE "Build SQLite connector for SQL catalog" OFF) +option(ICEBERG_SQL_POSTGRESQL "Build PostgreSQL connector for SQL catalog" OFF) +option(ICEBERG_SQL_MYSQL "Build MySQL connector for SQL catalog" OFF) option(ICEBERG_S3 "Build with S3 support" OFF) option(ICEBERG_SIGV4 "Build with SigV4 support" OFF) option(ICEBERG_BUNDLE_AWSSDK "Bundle AWS SDK for S3/SigV4 support" ON) +option(ICEBERG_BUNDLE_THRIFT "Bundle Thrift (from Arrow) for Hive catalog" ON) option(ICEBERG_ENABLE_ASAN "Enable Address Sanitizer" OFF) option(ICEBERG_ENABLE_UBSAN "Enable Undefined Behavior Sanitizer" OFF) diff --git a/cmake_modules/FindThriftAlt.cmake b/cmake_modules/FindThriftAlt.cmake new file mode 100644 index 000000000..13820998b --- /dev/null +++ b/cmake_modules/FindThriftAlt.cmake @@ -0,0 +1,136 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# FindThriftAlt.cmake - locate an installed Apache Thrift C++ runtime. +# +# Named "ThriftAlt" rather than "Thrift" (following Arrow's FindThriftAlt.cmake) +# so it does not collide with a downstream project's own FindThrift.cmake, and +# so this module can itself call find_package(Thrift CONFIG) to reuse an +# upstream ThriftConfig.cmake without recursing into itself. +# +# Discovery order: +# 1. CONFIG mode (ThriftConfig.cmake), shipped by CMake-based Thrift installs +# (>= 0.13). When present it already provides a usable thrift::thrift. +# 2. pkg-config (thrift.pc), used by autotools installs and Homebrew. +# 3. A plain library / header search as a last resort (e.g. when neither a +# CMake config nor pkg-config is available). +# +# This module defines: +# ThriftAlt_FOUND - whether the Thrift C++ runtime was found +# ThriftAlt_VERSION - the detected Thrift version, if known +# thrift::thrift - imported target for the Thrift C++ runtime + +if(ThriftAlt_FOUND OR TARGET thrift::thrift) + set(ThriftAlt_FOUND TRUE) + return() +endif() + +# ---------------------------------------------------------------------- +# 1. CONFIG mode: reuse an upstream ThriftConfig.cmake when available. +# +# This module is intentionally NOT named FindThrift.cmake, so a CONFIG-mode +# find_package(Thrift) here resolves to the upstream package config rather than +# back to this file. + +set(_thriftalt_config_args CONFIG QUIET) +if(ThriftAlt_FIND_VERSION) + list(APPEND _thriftalt_config_args ${ThriftAlt_FIND_VERSION}) +endif() +find_package(Thrift ${_thriftalt_config_args}) +if(Thrift_FOUND AND TARGET thrift::thrift) + set(ThriftAlt_FOUND TRUE) + set(ThriftAlt_VERSION "${Thrift_VERSION}") +endif() + +# ---------------------------------------------------------------------- +# 2 + 3. pkg-config, then a plain library / header search. + +if(NOT ThriftAlt_FOUND) + find_package(PkgConfig QUIET) + if(PkgConfig_FOUND) + pkg_check_modules(THRIFT_PC QUIET thrift) + endif() + + find_library(ThriftAlt_LIB + NAMES thrift libthrift + HINTS ${THRIFT_PC_LIBDIR} ${THRIFT_PC_LIBRARY_DIRS} + PATH_SUFFIXES "lib/${CMAKE_LIBRARY_ARCHITECTURE}" "lib") + find_path(ThriftAlt_INCLUDE_DIR + NAMES thrift/Thrift.h + HINTS ${THRIFT_PC_INCLUDEDIR} ${THRIFT_PC_INCLUDE_DIRS} + PATH_SUFFIXES "include") + + if(THRIFT_PC_VERSION) + set(ThriftAlt_VERSION "${THRIFT_PC_VERSION}") + elseif(ThriftAlt_INCLUDE_DIR AND EXISTS "${ThriftAlt_INCLUDE_DIR}/thrift/config.h") + file(READ "${ThriftAlt_INCLUDE_DIR}/thrift/config.h" _thrift_config_h) + string(REGEX MATCH "#define PACKAGE_VERSION \"([0-9.]+)\"" _ "${_thrift_config_h}") + set(ThriftAlt_VERSION "${CMAKE_MATCH_1}") + endif() +endif() + +include(FindPackageHandleStandardArgs) +if(TARGET thrift::thrift) + # CONFIG mode already produced the target; satisfy REQUIRED_VARS with it. + find_package_handle_standard_args( + ThriftAlt + REQUIRED_VARS thrift::thrift + VERSION_VAR ThriftAlt_VERSION) +else() + find_package_handle_standard_args( + ThriftAlt + REQUIRED_VARS ThriftAlt_LIB ThriftAlt_INCLUDE_DIR + VERSION_VAR ThriftAlt_VERSION) +endif() + +if(ThriftAlt_FOUND AND NOT TARGET thrift::thrift) + add_library(thrift::thrift UNKNOWN IMPORTED) + set_target_properties(thrift::thrift + PROPERTIES IMPORTED_LOCATION "${ThriftAlt_LIB}" + INTERFACE_INCLUDE_DIRECTORIES + "${ThriftAlt_INCLUDE_DIR}") + if(WIN32) + set_property(TARGET thrift::thrift PROPERTY INTERFACE_LINK_LIBRARIES "ws2_32") + endif() +endif() + +# ---------------------------------------------------------------------- +# Boost headers. +# +# Thrift's public C++ headers include , but +# neither thrift.pc nor (older) ThriftConfig.cmake advertise Boost. Attach Boost +# headers so consumers of thrift::thrift can compile the generated bindings. +# +# Use CONFIG mode explicitly: the legacy FindBoost module was removed and emits +# a CMP0167 warning under cmake_minimum_required(VERSION < 3.30), which would +# surface in every downstream find_dependency(ThriftAlt). + +if(ThriftAlt_FOUND) + find_package(Boost CONFIG QUIET) + if(TARGET Boost::headers) + target_link_libraries(thrift::thrift INTERFACE Boost::headers) + elseif(Boost_INCLUDE_DIRS) + set_property(TARGET thrift::thrift APPEND PROPERTY INTERFACE_INCLUDE_DIRECTORIES + "${Boost_INCLUDE_DIRS}") + else() + message(FATAL_ERROR "Apache Thrift's C++ headers require Boost headers, but Boost was not " + "found. Install Boost development headers (e.g. 'brew install boost' " + "or 'apt install libboost-dev').") + endif() +endif() + +mark_as_advanced(ThriftAlt_LIB ThriftAlt_INCLUDE_DIR) diff --git a/cmake_modules/IcebergThirdpartyToolchain.cmake b/cmake_modules/IcebergThirdpartyToolchain.cmake index 8e10fd8ec..825a5a054 100644 --- a/cmake_modules/IcebergThirdpartyToolchain.cmake +++ b/cmake_modules/IcebergThirdpartyToolchain.cmake @@ -27,6 +27,17 @@ if(ICEBERG_S3 AND ICEBERG_BUNDLE_AWSSDK) set(ICEBERG_AWSSDK_BUNDLED TRUE) endif() +# Mirror the AWS SDK bundle/system policy for Thrift (used by the Hive catalog): +# ICEBERG_BUNDLE_THRIFT is the user's intent, ICEBERG_THRIFT_BUNDLED the resolved +# conclusion that the rest of the build keys off. +set(ICEBERG_THRIFT_BUNDLED FALSE) +if(ICEBERG_BUILD_HIVE AND ICEBERG_BUNDLE_THRIFT) + if(NOT ICEBERG_BUILD_BUNDLE) + message(FATAL_ERROR "ICEBERG_BUNDLE_THRIFT requires ICEBERG_BUILD_BUNDLE to be ON") + endif() + set(ICEBERG_THRIFT_BUNDLED TRUE) +endif() + set(ICEBERG_AWSSDK_COMPONENTS) if(NOT ICEBERG_AWSSDK_BUNDLED) if(ICEBERG_S3) @@ -742,3 +753,39 @@ endif() if(ICEBERG_BUILD_SQL_CATALOG) resolve_sql_catalog_dependencies() endif() + +# ---------------------------------------------------------------------- +# Thrift (Hive catalog) +# +# Provide a `thrift::thrift` target for iceberg_hive's generated Hive Metastore +# bindings, either bundled (from Arrow's build) or from a system install. Must +# run after resolve_arrow_dependency() so the bundled `thrift` target exists. + +function(resolve_thrift_dependency) + if(NOT ICEBERG_BUILD_HIVE) + return() + endif() + if(ICEBERG_THRIFT_BUNDLED) + # Arrow's bundled build creates the Thrift C++ runtime as a `thrift` target + # scoped to its FetchContent directory, where iceberg_hive cannot see it. + # Promote it to a global `thrift::thrift` alias so iceberg_hive can link the + # generated Hive Metastore bindings against it. + if(TARGET thrift AND NOT TARGET thrift::thrift) + add_library(thrift::thrift INTERFACE IMPORTED GLOBAL) + target_link_libraries(thrift::thrift INTERFACE thrift) + endif() + else() + # System Thrift, located by cmake_modules/FindThriftAlt.cmake (MODULE mode), + # which provides the `thrift::thrift` target iceberg_hive expects. Record it + # as a system dependency so downstream find_package(Iceberg) re-finds it. + find_package(ThriftAlt MODULE REQUIRED GLOBAL) + list(APPEND ICEBERG_SYSTEM_DEPENDENCIES ThriftAlt) + set(ICEBERG_SYSTEM_DEPENDENCIES + ${ICEBERG_SYSTEM_DEPENDENCIES} + PARENT_SCOPE) + endif() +endfunction() + +if(ICEBERG_BUILD_HIVE) + resolve_thrift_dependency() +endif() diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt index 94523fb54..1bf1f10db 100644 --- a/src/iceberg/CMakeLists.txt +++ b/src/iceberg/CMakeLists.txt @@ -347,6 +347,14 @@ endif() iceberg_install_cmake_package(iceberg iceberg_targets) +# When linking a system Thrift, downstream find_package(Iceberg) calls +# find_dependency(ThriftAlt); ship FindThriftAlt.cmake next to the package config +# (the dir iceberg-config.cmake adds to CMAKE_MODULE_PATH) so that lookup resolves. +if(ICEBERG_BUILD_HIVE AND NOT ICEBERG_THRIFT_BUNDLED) + install(FILES "${PROJECT_SOURCE_DIR}/cmake_modules/FindThriftAlt.cmake" + DESTINATION "${ICEBERG_INSTALL_CMAKEDIR}/iceberg") +endif() + if(ICEBERG_BUILD_TESTS) add_subdirectory(test) endif() diff --git a/src/iceberg/catalog/hive/CMakeLists.txt b/src/iceberg/catalog/hive/CMakeLists.txt index 0dee1ad10..30c2a380a 100644 --- a/src/iceberg/catalog/hive/CMakeLists.txt +++ b/src/iceberg/catalog/hive/CMakeLists.txt @@ -15,11 +15,92 @@ # specific language governing permissions and limitations # under the License. -# Skeleton for the iceberg_hive library target. +# The iceberg_hive library: a Hive Metastore (HMS) catalog client built on +# generated Apache Thrift bindings. Layout mirrors iceberg_rest. + +# ---------------------------------------------------------------------- +# Hive Metastore Thrift bindings. # -# Sources, dependency wiring and the actual `iceberg_hive` library target -# are introduced in follow-up commits. For now this file installs only the -# public export header so that the directory is wired into the build system -# end-to-end. +# These are checked into gen-cpp/ rather than generated at build time, so a +# normal build needs no Thrift IDL compiler — only the Thrift C++ runtime, +# which comes from Apache Arrow's bundled build. Regenerate them with +# dev/update_hive_thrift.sh whenever thirdparty/hive_metastore/*.thrift changes. + +set(_thrift_gen_dir ${CMAKE_CURRENT_SOURCE_DIR}/gen-cpp) + +set(ICEBERG_HIVE_THRIFT_GEN_SOURCES + ${_thrift_gen_dir}/FacebookService.cpp + ${_thrift_gen_dir}/fb303_types.cpp + ${_thrift_gen_dir}/hive_metastore_constants.cpp + ${_thrift_gen_dir}/hive_metastore_types.cpp + ${_thrift_gen_dir}/ThriftHiveMetastore.cpp) + +# Upstream-generated code: skip lint/format, and downgrade the deprecated +# std::iterator warning from Arrow's bundled Thrift 0.22 headers (removed +# upstream in Thrift 0.23, THRIFT-5698) so it does not trip -Werror. +set_source_files_properties(${ICEBERG_HIVE_THRIFT_GEN_SOURCES} + PROPERTIES SKIP_LINTING TRUE + COMPILE_OPTIONS + "$<$:-Wno-error=deprecated-declarations>" +) + +# ---------------------------------------------------------------------- +# iceberg_hive library + +if(NOT TARGET thrift::thrift) + message(FATAL_ERROR "iceberg_hive requires a `thrift::thrift` target. Either keep " + "ICEBERG_BUNDLE_THRIFT=ON with -DICEBERG_BUILD_BUNDLE=ON (the " + "default, uses Arrow's bundled Thrift), or build with " + "-DICEBERG_BUNDLE_THRIFT=OFF against a system Thrift install.") +endif() + +set(ICEBERG_HIVE_SOURCES hive_catalog.cc hive_catalog_properties.cc + ${ICEBERG_HIVE_THRIFT_GEN_SOURCES}) + +set(ICEBERG_HIVE_STATIC_BUILD_INTERFACE_LIBS) +set(ICEBERG_HIVE_SHARED_BUILD_INTERFACE_LIBS) +set(ICEBERG_HIVE_STATIC_INSTALL_INTERFACE_LIBS) +set(ICEBERG_HIVE_SHARED_INSTALL_INTERFACE_LIBS) + +# thrift::thrift is always required at build time (the generated bindings include +# and link against it). For the *install* interface it depends on the Thrift source: +# * Bundled (from Arrow): the Thrift runtime is baked into the installed Arrow +# bundled-dependencies archive that iceberg core already carries, and there is +# no exported `thrift::thrift` target downstream. Referencing it in the install +# interface would make downstream find_package(Iceberg) fail with a missing +# `thrift::thrift` target, so it must be omitted. +# * System: downstream find_package(Iceberg) re-creates `thrift::thrift` via +# find_dependency(ThriftAlt) (FindThriftAlt.cmake is shipped next to the +# config), so it must be propagated. +list(APPEND ICEBERG_HIVE_STATIC_BUILD_INTERFACE_LIBS + "$,iceberg_static,iceberg_shared>" thrift::thrift) +list(APPEND ICEBERG_HIVE_SHARED_BUILD_INTERFACE_LIBS + "$,iceberg_shared,iceberg_static>" thrift::thrift) +list(APPEND + ICEBERG_HIVE_STATIC_INSTALL_INTERFACE_LIBS + "$,iceberg::iceberg_static,iceberg::iceberg_shared>" +) +list(APPEND + ICEBERG_HIVE_SHARED_INSTALL_INTERFACE_LIBS + "$,iceberg::iceberg_shared,iceberg::iceberg_static>" +) +if(NOT ICEBERG_THRIFT_BUNDLED) + list(APPEND ICEBERG_HIVE_STATIC_INSTALL_INTERFACE_LIBS thrift::thrift) + list(APPEND ICEBERG_HIVE_SHARED_INSTALL_INTERFACE_LIBS thrift::thrift) +endif() + +add_iceberg_lib(iceberg_hive + SOURCES + ${ICEBERG_HIVE_SOURCES} + PRIVATE_INCLUDES + ${_thrift_gen_dir} + SHARED_LINK_LIBS + ${ICEBERG_HIVE_SHARED_BUILD_INTERFACE_LIBS} + STATIC_LINK_LIBS + ${ICEBERG_HIVE_STATIC_BUILD_INTERFACE_LIBS} + STATIC_INSTALL_INTERFACE_LIBS + ${ICEBERG_HIVE_STATIC_INSTALL_INTERFACE_LIBS} + SHARED_INSTALL_INTERFACE_LIBS + ${ICEBERG_HIVE_SHARED_INSTALL_INTERFACE_LIBS}) iceberg_install_all_headers(iceberg/catalog/hive) diff --git a/src/iceberg/catalog/hive/hive_catalog.cc b/src/iceberg/catalog/hive/hive_catalog.cc new file mode 100644 index 000000000..1309babaa --- /dev/null +++ b/src/iceberg/catalog/hive/hive_catalog.cc @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/catalog/hive/hive_catalog.h" + +#include +#include + +#include "iceberg/util/macros.h" + +namespace iceberg::hive { + +namespace { + +constexpr std::string_view kNotImplementedMessage = + "HiveCatalog method is not yet implemented."; + +} // namespace + +HiveCatalog::HiveCatalog(HiveCatalogProperties config) + : config_(std::move(config)), name_(config_.Get(HiveCatalogProperties::kName)) {} + +HiveCatalog::~HiveCatalog() = default; + +Result> HiveCatalog::Make( + const HiveCatalogProperties& config) { + ICEBERG_RETURN_UNEXPECTED(config.Uri()); + return std::shared_ptr(new HiveCatalog(config)); +} + +std::string_view HiveCatalog::name() const { return name_; } + +Status HiveCatalog::CreateNamespace( + const Namespace& /*ns*/, + const std::unordered_map& /*properties*/) { + return NotImplemented("{}", kNotImplementedMessage); +} + +Result> HiveCatalog::ListNamespaces( + const Namespace& /*ns*/) const { + return NotImplemented("{}", kNotImplementedMessage); +} + +Result> HiveCatalog::GetNamespaceProperties( + const Namespace& /*ns*/) const { + return NotImplemented("{}", kNotImplementedMessage); +} + +Status HiveCatalog::DropNamespace(const Namespace& /*ns*/) { + return NotImplemented("{}", kNotImplementedMessage); +} + +Result HiveCatalog::NamespaceExists(const Namespace& /*ns*/) const { + return NotImplemented("{}", kNotImplementedMessage); +} + +Status HiveCatalog::UpdateNamespaceProperties( + const Namespace& /*ns*/, + const std::unordered_map& /*updates*/, + const std::unordered_set& /*removals*/) { + return NotImplemented("{}", kNotImplementedMessage); +} + +Result> HiveCatalog::ListTables( + const Namespace& /*ns*/) const { + return NotImplemented("{}", kNotImplementedMessage); +} + +Result> HiveCatalog::CreateTable( + const TableIdentifier& /*identifier*/, const std::shared_ptr& /*schema*/, + const std::shared_ptr& /*spec*/, + const std::shared_ptr& /*order*/, const std::string& /*location*/, + const std::unordered_map& /*properties*/) { + return NotImplemented("{}", kNotImplementedMessage); +} + +Result> HiveCatalog::UpdateTable( + const TableIdentifier& /*identifier*/, + const std::vector>& /*requirements*/, + const std::vector>& /*updates*/) { + return NotImplemented("{}", kNotImplementedMessage); +} + +Result> HiveCatalog::StageCreateTable( + const TableIdentifier& /*identifier*/, const std::shared_ptr& /*schema*/, + const std::shared_ptr& /*spec*/, + const std::shared_ptr& /*order*/, const std::string& /*location*/, + const std::unordered_map& /*properties*/) { + return NotImplemented("{}", kNotImplementedMessage); +} + +Result HiveCatalog::TableExists(const TableIdentifier& /*identifier*/) const { + return NotImplemented("{}", kNotImplementedMessage); +} + +Status HiveCatalog::DropTable(const TableIdentifier& /*identifier*/, bool /*purge*/) { + return NotImplemented("{}", kNotImplementedMessage); +} + +Status HiveCatalog::RenameTable(const TableIdentifier& /*from*/, + const TableIdentifier& /*to*/) { + return NotImplemented("{}", kNotImplementedMessage); +} + +Result> HiveCatalog::LoadTable( + const TableIdentifier& /*identifier*/) { + return NotImplemented("{}", kNotImplementedMessage); +} + +Result> HiveCatalog::RegisterTable( + const TableIdentifier& /*identifier*/, + const std::string& /*metadata_file_location*/) { + return NotImplemented("{}", kNotImplementedMessage); +} + +} // namespace iceberg::hive diff --git a/src/iceberg/catalog/hive/hive_catalog.h b/src/iceberg/catalog/hive/hive_catalog.h new file mode 100644 index 000000000..bd598fee9 --- /dev/null +++ b/src/iceberg/catalog/hive/hive_catalog.h @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include +#include +#include + +#include "iceberg/catalog.h" +#include "iceberg/catalog/hive/hive_catalog_properties.h" +#include "iceberg/catalog/hive/iceberg_hive_export.h" +#include "iceberg/result.h" + +/// \file iceberg/catalog/hive/hive_catalog.h +/// \brief HiveCatalog implementation for talking to a Hive Metastore (HMS). + +namespace iceberg::hive { + +/// \brief Catalog implementation backed by a Hive Metastore. +/// +/// Currently a stub: every Catalog method returns +/// ErrorKind::kNotImplemented. Follow-up changes add the HMS Thrift client +/// and wire each method to the metastore. +class ICEBERG_HIVE_EXPORT HiveCatalog : public Catalog, + public std::enable_shared_from_this { + public: + ~HiveCatalog() override; + + HiveCatalog(const HiveCatalog&) = delete; + HiveCatalog& operator=(const HiveCatalog&) = delete; + HiveCatalog(HiveCatalog&&) = delete; + HiveCatalog& operator=(HiveCatalog&&) = delete; + + /// \brief Construct a HiveCatalog from `config`. + /// + /// Only stores the configuration for now; HMS connection setup comes + /// with the Thrift client. Returns an error if the supplied + /// configuration is missing required fields (currently: the URI). + static Result> Make(const HiveCatalogProperties& config); + + std::string_view name() const override; + + Status CreateNamespace( + const Namespace& ns, + const std::unordered_map& properties) override; + + Result> ListNamespaces(const Namespace& ns) const override; + + Result> GetNamespaceProperties( + const Namespace& ns) const override; + + Status DropNamespace(const Namespace& ns) override; + + Result NamespaceExists(const Namespace& ns) const override; + + Status UpdateNamespaceProperties( + const Namespace& ns, const std::unordered_map& updates, + const std::unordered_set& removals) override; + + Result> ListTables(const Namespace& ns) const override; + + Result> CreateTable( + const TableIdentifier& identifier, const std::shared_ptr& schema, + const std::shared_ptr& spec, const std::shared_ptr& order, + const std::string& location, + const std::unordered_map& properties) override; + + Result> UpdateTable( + const TableIdentifier& identifier, + const std::vector>& requirements, + const std::vector>& updates) override; + + Result> StageCreateTable( + const TableIdentifier& identifier, const std::shared_ptr& schema, + const std::shared_ptr& spec, const std::shared_ptr& order, + const std::string& location, + const std::unordered_map& properties) override; + + Result TableExists(const TableIdentifier& identifier) const override; + + Status DropTable(const TableIdentifier& identifier, bool purge) override; + + Status RenameTable(const TableIdentifier& from, const TableIdentifier& to) override; + + Result> LoadTable(const TableIdentifier& identifier) override; + + Result> RegisterTable( + const TableIdentifier& identifier, + const std::string& metadata_file_location) override; + + private: + explicit HiveCatalog(HiveCatalogProperties config); + + HiveCatalogProperties config_; + std::string name_; +}; + +} // namespace iceberg::hive diff --git a/src/iceberg/catalog/hive/hive_catalog_properties.cc b/src/iceberg/catalog/hive/hive_catalog_properties.cc new file mode 100644 index 000000000..e09aa1627 --- /dev/null +++ b/src/iceberg/catalog/hive/hive_catalog_properties.cc @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/catalog/hive/hive_catalog_properties.h" + +#include +#include + +#include "iceberg/util/string_util.h" + +namespace iceberg::hive { + +HiveCatalogProperties HiveCatalogProperties::default_properties() { return {}; } + +HiveCatalogProperties HiveCatalogProperties::FromMap( + std::unordered_map properties) { + HiveCatalogProperties hive_catalog_config; + hive_catalog_config.configs_ = std::move(properties); + return hive_catalog_config; +} + +Result HiveCatalogProperties::Uri() const { + auto it = configs_.find(kUri.key()); + if (it == configs_.end() || it->second.empty()) { + return InvalidArgument("Hive catalog configuration property 'uri' is required."); + } + return it->second; +} + +Result HiveCatalogProperties::ThriftTransport() const { + const std::string upper = StringUtils::ToUpper(Get(kThriftTransport)); + if (upper == "BUFFERED") { + return HiveThriftTransport::kBuffered; + } + if (upper == "FRAMED") { + return HiveThriftTransport::kFramed; + } + return InvalidArgument("Invalid Hive thrift transport: '{}'.", Get(kThriftTransport)); +} + +} // namespace iceberg::hive diff --git a/src/iceberg/catalog/hive/hive_catalog_properties.h b/src/iceberg/catalog/hive/hive_catalog_properties.h new file mode 100644 index 000000000..5f1273b34 --- /dev/null +++ b/src/iceberg/catalog/hive/hive_catalog_properties.h @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include +#include + +#include "iceberg/catalog/hive/iceberg_hive_export.h" +#include "iceberg/result.h" +#include "iceberg/util/config.h" + +/// \file iceberg/catalog/hive/hive_catalog_properties.h +/// \brief Configuration for connecting to a Hive Metastore (HMS) over Thrift. + +namespace iceberg::hive { + +/// \brief Thrift framing mode used to connect to the Hive Metastore. +/// +/// Most HMS deployments default to TBufferedTransport. TFramedTransport is +/// required by HMS instances that have been configured to use framed +/// transport (for example, certain Hive 3.x setups with SASL enabled). +enum class HiveThriftTransport : uint8_t { kBuffered, kFramed }; + +/// \brief Configuration for the iceberg_hive HiveCatalog. +/// +/// HMS connection settings (URI, transport, timeouts) plus warehouse / FileIO +/// metadata. Authentication (SASL/Kerberos) and HMS-side locking are +/// introduced in follow-up changes. +class ICEBERG_HIVE_EXPORT HiveCatalogProperties + : public ConfigBase { + public: + template + using Entry = const ConfigBase::Entry; + + /// \brief The URI of the Hive Metastore Thrift endpoint. + /// + /// Accepted forms (matching the conventions used by iceberg-java and + /// iceberg-rust): + /// * `thrift://host:port` + /// * `host:port` + /// * comma-separated list of either form for HA failover + inline static Entry kUri{"uri", ""}; + + /// \brief The catalog name reported by `name()`. Defaults to "hive". + inline static Entry kName{"name", "hive"}; + + /// \brief The warehouse root path (for example, `s3://bucket/warehouse` + /// or `hdfs://nn/path`). Used as the default base location for new + /// tables that do not specify their own location. + inline static Entry kWarehouse{"warehouse", ""}; + + /// \brief The FileIO implementation name used to read and write Iceberg + /// metadata files. + inline static Entry kIOImpl{"io-impl", ""}; + + /// \brief Thrift framing for the HMS connection ("buffered" or "framed"). + inline static Entry kThriftTransport{"thrift-transport", "buffered"}; + + /// \brief HMS connect timeout, in milliseconds. + inline static Entry kConnectTimeoutMs{"connect-timeout-ms", 30000}; + + /// \brief HMS socket / RPC timeout, in milliseconds. + inline static Entry kSocketTimeoutMs{"socket-timeout-ms", 60000}; + + /// \brief When true, wrap the commit path with HMS `lock` / `unlock` for + /// extra safety on top of the metadata_location CAS. Defaults to false + /// because CAS already handles single-writer correctness; turn this on + /// for environments with high write concurrency. + inline static Entry kLockEnabled{"hive.lock-enabled", false}; + + /// \brief Build a HiveCatalogProperties with defaults applied. + static HiveCatalogProperties default_properties(); + + /// \brief Build a HiveCatalogProperties from a property map. + static HiveCatalogProperties FromMap( + std::unordered_map properties); + + /// \brief Resolve `kUri`. Returns an error if the URI is unset or empty. + Result Uri() const; + + /// \brief Parse `kThriftTransport` into a HiveThriftTransport. Comparison + /// is case-insensitive to match the conventions used by other Iceberg + /// language ports. + Result ThriftTransport() const; +}; + +} // namespace iceberg::hive diff --git a/src/iceberg/iceberg-config.cmake.in b/src/iceberg/iceberg-config.cmake.in index dfb0e1dbc..9a7bfbea4 100644 --- a/src/iceberg/iceberg-config.cmake.in +++ b/src/iceberg/iceberg-config.cmake.in @@ -30,6 +30,8 @@ # iceberg::iceberg_bundle_static # iceberg::iceberg_rest_shared # iceberg::iceberg_rest_static +# iceberg::iceberg_hive_shared +# iceberg::iceberg_hive_static # iceberg::iceberg_catalog_sql_shared # iceberg::iceberg_catalog_sql_static