Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,13 @@ option(ICEBERG_BUILD_HIVE "Build hive (HMS) catalog client" OFF)
option(ICEBERG_BUILD_SQL_CATALOG "Build SQL catalog client" OFF)
# Built-in SQL catalog database connectors. Disable all of them to build a SQL
# catalog that only works with a user-supplied CatalogStore.
option(ICEBERG_SQL_SQLITE "Build the SQLite connector for the SQL catalog" OFF)
option(ICEBERG_SQL_POSTGRESQL "Build the PostgreSQL connector for the SQL catalog" OFF)
option(ICEBERG_SQL_MYSQL "Build the MySQL connector for the SQL catalog" OFF)
option(ICEBERG_SQL_SQLITE "Build SQLite connector for SQL catalog" OFF)
option(ICEBERG_SQL_POSTGRESQL "Build PostgreSQL connector for SQL catalog" OFF)
option(ICEBERG_SQL_MYSQL "Build MySQL connector for SQL catalog" OFF)
option(ICEBERG_S3 "Build with S3 support" OFF)
option(ICEBERG_SIGV4 "Build with SigV4 support" OFF)
option(ICEBERG_BUNDLE_AWSSDK "Bundle AWS SDK for S3/SigV4 support" ON)
option(ICEBERG_BUNDLE_THRIFT "Bundle Thrift (from Arrow) for Hive catalog" ON)
option(ICEBERG_ENABLE_ASAN "Enable Address Sanitizer" OFF)
option(ICEBERG_ENABLE_UBSAN "Enable Undefined Behavior Sanitizer" OFF)

Expand Down
136 changes: 136 additions & 0 deletions cmake_modules/FindThriftAlt.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# FindThriftAlt.cmake - locate an installed Apache Thrift C++ runtime.
#
# Named "ThriftAlt" rather than "Thrift" (following Arrow's FindThriftAlt.cmake)
# so it does not collide with a downstream project's own FindThrift.cmake, and
# so this module can itself call find_package(Thrift CONFIG) to reuse an
# upstream ThriftConfig.cmake without recursing into itself.
#
# Discovery order:
# 1. CONFIG mode (ThriftConfig.cmake), shipped by CMake-based Thrift installs
# (>= 0.13). When present it already provides a usable thrift::thrift.
# 2. pkg-config (thrift.pc), used by autotools installs and Homebrew.
# 3. A plain library / header search as a last resort (e.g. when neither a
# CMake config nor pkg-config is available).
#
# This module defines:
# ThriftAlt_FOUND - whether the Thrift C++ runtime was found
# ThriftAlt_VERSION - the detected Thrift version, if known
# thrift::thrift - imported target for the Thrift C++ runtime

if(ThriftAlt_FOUND OR TARGET thrift::thrift)
set(ThriftAlt_FOUND TRUE)
return()
endif()

# ----------------------------------------------------------------------
# 1. CONFIG mode: reuse an upstream ThriftConfig.cmake when available.
#
# This module is intentionally NOT named FindThrift.cmake, so a CONFIG-mode
# find_package(Thrift) here resolves to the upstream package config rather than
# back to this file.

set(_thriftalt_config_args CONFIG QUIET)
if(ThriftAlt_FIND_VERSION)
list(APPEND _thriftalt_config_args ${ThriftAlt_FIND_VERSION})
endif()
find_package(Thrift ${_thriftalt_config_args})
if(Thrift_FOUND AND TARGET thrift::thrift)
set(ThriftAlt_FOUND TRUE)
set(ThriftAlt_VERSION "${Thrift_VERSION}")
endif()

# ----------------------------------------------------------------------
# 2 + 3. pkg-config, then a plain library / header search.

if(NOT ThriftAlt_FOUND)
find_package(PkgConfig QUIET)
if(PkgConfig_FOUND)
pkg_check_modules(THRIFT_PC QUIET thrift)
endif()

find_library(ThriftAlt_LIB
NAMES thrift libthrift
HINTS ${THRIFT_PC_LIBDIR} ${THRIFT_PC_LIBRARY_DIRS}
PATH_SUFFIXES "lib/${CMAKE_LIBRARY_ARCHITECTURE}" "lib")
find_path(ThriftAlt_INCLUDE_DIR
NAMES thrift/Thrift.h
HINTS ${THRIFT_PC_INCLUDEDIR} ${THRIFT_PC_INCLUDE_DIRS}
PATH_SUFFIXES "include")

if(THRIFT_PC_VERSION)
set(ThriftAlt_VERSION "${THRIFT_PC_VERSION}")
elseif(ThriftAlt_INCLUDE_DIR AND EXISTS "${ThriftAlt_INCLUDE_DIR}/thrift/config.h")
file(READ "${ThriftAlt_INCLUDE_DIR}/thrift/config.h" _thrift_config_h)
string(REGEX MATCH "#define PACKAGE_VERSION \"([0-9.]+)\"" _ "${_thrift_config_h}")
set(ThriftAlt_VERSION "${CMAKE_MATCH_1}")
endif()
endif()

include(FindPackageHandleStandardArgs)
if(TARGET thrift::thrift)
# CONFIG mode already produced the target; satisfy REQUIRED_VARS with it.
find_package_handle_standard_args(
ThriftAlt
REQUIRED_VARS thrift::thrift
VERSION_VAR ThriftAlt_VERSION)
else()
find_package_handle_standard_args(
ThriftAlt
REQUIRED_VARS ThriftAlt_LIB ThriftAlt_INCLUDE_DIR
VERSION_VAR ThriftAlt_VERSION)
endif()

if(ThriftAlt_FOUND AND NOT TARGET thrift::thrift)
add_library(thrift::thrift UNKNOWN IMPORTED)
set_target_properties(thrift::thrift
PROPERTIES IMPORTED_LOCATION "${ThriftAlt_LIB}"
INTERFACE_INCLUDE_DIRECTORIES
"${ThriftAlt_INCLUDE_DIR}")
if(WIN32)
set_property(TARGET thrift::thrift PROPERTY INTERFACE_LINK_LIBRARIES "ws2_32")
endif()
endif()

# ----------------------------------------------------------------------
# Boost headers.
#
# Thrift's public C++ headers include <boost/numeric/conversion/cast.hpp>, but
# neither thrift.pc nor (older) ThriftConfig.cmake advertise Boost. Attach Boost
# headers so consumers of thrift::thrift can compile the generated bindings.
#
# Use CONFIG mode explicitly: the legacy FindBoost module was removed and emits
# a CMP0167 warning under cmake_minimum_required(VERSION < 3.30), which would
# surface in every downstream find_dependency(ThriftAlt).

if(ThriftAlt_FOUND)
find_package(Boost CONFIG QUIET)
if(TARGET Boost::headers)
target_link_libraries(thrift::thrift INTERFACE Boost::headers)
elseif(Boost_INCLUDE_DIRS)
set_property(TARGET thrift::thrift APPEND PROPERTY INTERFACE_INCLUDE_DIRECTORIES
"${Boost_INCLUDE_DIRS}")
else()
message(FATAL_ERROR "Apache Thrift's C++ headers require Boost headers, but Boost was not "
"found. Install Boost development headers (e.g. 'brew install boost' "
"or 'apt install libboost-dev').")
endif()
endif()

mark_as_advanced(ThriftAlt_LIB ThriftAlt_INCLUDE_DIR)
47 changes: 47 additions & 0 deletions cmake_modules/IcebergThirdpartyToolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,17 @@ if(ICEBERG_S3 AND ICEBERG_BUNDLE_AWSSDK)
set(ICEBERG_AWSSDK_BUNDLED TRUE)
endif()

# Mirror the AWS SDK bundle/system policy for Thrift (used by the Hive catalog):
# ICEBERG_BUNDLE_THRIFT is the user's intent, ICEBERG_THRIFT_BUNDLED the resolved
# conclusion that the rest of the build keys off.
set(ICEBERG_THRIFT_BUNDLED FALSE)
if(ICEBERG_BUILD_HIVE AND ICEBERG_BUNDLE_THRIFT)
if(NOT ICEBERG_BUILD_BUNDLE)
message(FATAL_ERROR "ICEBERG_BUNDLE_THRIFT requires ICEBERG_BUILD_BUNDLE to be ON")
endif()
set(ICEBERG_THRIFT_BUNDLED TRUE)
endif()

set(ICEBERG_AWSSDK_COMPONENTS)
if(NOT ICEBERG_AWSSDK_BUNDLED)
if(ICEBERG_S3)
Expand Down Expand Up @@ -742,3 +753,39 @@ endif()
if(ICEBERG_BUILD_SQL_CATALOG)
resolve_sql_catalog_dependencies()
endif()

# ----------------------------------------------------------------------
# Thrift (Hive catalog)
#
# Provide a `thrift::thrift` target for iceberg_hive's generated Hive Metastore
# bindings, either bundled (from Arrow's build) or from a system install. Must
# run after resolve_arrow_dependency() so the bundled `thrift` target exists.

function(resolve_thrift_dependency)
if(NOT ICEBERG_BUILD_HIVE)
return()
endif()
if(ICEBERG_THRIFT_BUNDLED)
# Arrow's bundled build creates the Thrift C++ runtime as a `thrift` target
# scoped to its FetchContent directory, where iceberg_hive cannot see it.
# Promote it to a global `thrift::thrift` alias so iceberg_hive can link the
# generated Hive Metastore bindings against it.
if(TARGET thrift AND NOT TARGET thrift::thrift)
add_library(thrift::thrift INTERFACE IMPORTED GLOBAL)
target_link_libraries(thrift::thrift INTERFACE thrift)
endif()
else()
# System Thrift, located by cmake_modules/FindThriftAlt.cmake (MODULE mode),
# which provides the `thrift::thrift` target iceberg_hive expects. Record it
# as a system dependency so downstream find_package(Iceberg) re-finds it.
find_package(ThriftAlt MODULE REQUIRED GLOBAL)
list(APPEND ICEBERG_SYSTEM_DEPENDENCIES ThriftAlt)
set(ICEBERG_SYSTEM_DEPENDENCIES
${ICEBERG_SYSTEM_DEPENDENCIES}
PARENT_SCOPE)
endif()
endfunction()

if(ICEBERG_BUILD_HIVE)
resolve_thrift_dependency()
endif()
8 changes: 8 additions & 0 deletions src/iceberg/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,14 @@ endif()

iceberg_install_cmake_package(iceberg iceberg_targets)

# When linking a system Thrift, downstream find_package(Iceberg) calls
# find_dependency(ThriftAlt); ship FindThriftAlt.cmake next to the package config
# (the dir iceberg-config.cmake adds to CMAKE_MODULE_PATH) so that lookup resolves.
if(ICEBERG_BUILD_HIVE AND NOT ICEBERG_THRIFT_BUNDLED)
install(FILES "${PROJECT_SOURCE_DIR}/cmake_modules/FindThriftAlt.cmake"
DESTINATION "${ICEBERG_INSTALL_CMAKEDIR}/iceberg")
endif()

if(ICEBERG_BUILD_TESTS)
add_subdirectory(test)
endif()
91 changes: 86 additions & 5 deletions src/iceberg/catalog/hive/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,92 @@
# specific language governing permissions and limitations
# under the License.

# Skeleton for the iceberg_hive library target.
# The iceberg_hive library: a Hive Metastore (HMS) catalog client built on
# generated Apache Thrift bindings. Layout mirrors iceberg_rest.

# ----------------------------------------------------------------------
# Hive Metastore Thrift bindings.
#
# Sources, dependency wiring and the actual `iceberg_hive` library target
# are introduced in follow-up commits. For now this file installs only the
# public export header so that the directory is wired into the build system
# end-to-end.
# These are checked into gen-cpp/ rather than generated at build time, so a
# normal build needs no Thrift IDL compiler — only the Thrift C++ runtime,
# which comes from Apache Arrow's bundled build. Regenerate them with
# dev/update_hive_thrift.sh whenever thirdparty/hive_metastore/*.thrift changes.

set(_thrift_gen_dir ${CMAKE_CURRENT_SOURCE_DIR}/gen-cpp)

set(ICEBERG_HIVE_THRIFT_GEN_SOURCES
${_thrift_gen_dir}/FacebookService.cpp
${_thrift_gen_dir}/fb303_types.cpp
${_thrift_gen_dir}/hive_metastore_constants.cpp
${_thrift_gen_dir}/hive_metastore_types.cpp
${_thrift_gen_dir}/ThriftHiveMetastore.cpp)

# Upstream-generated code: skip lint/format, and downgrade the deprecated
# std::iterator warning from Arrow's bundled Thrift 0.22 headers (removed
# upstream in Thrift 0.23, THRIFT-5698) so it does not trip -Werror.
set_source_files_properties(${ICEBERG_HIVE_THRIFT_GEN_SOURCES}
PROPERTIES SKIP_LINTING TRUE
COMPILE_OPTIONS
"$<$<CXX_COMPILER_ID:GNU,Clang,AppleClang>:-Wno-error=deprecated-declarations>"
)

# ----------------------------------------------------------------------
# iceberg_hive library

if(NOT TARGET thrift::thrift)
message(FATAL_ERROR "iceberg_hive requires a `thrift::thrift` target. Either keep "
"ICEBERG_BUNDLE_THRIFT=ON with -DICEBERG_BUILD_BUNDLE=ON (the "
"default, uses Arrow's bundled Thrift), or build with "
"-DICEBERG_BUNDLE_THRIFT=OFF against a system Thrift install.")
endif()

set(ICEBERG_HIVE_SOURCES hive_catalog.cc hive_catalog_properties.cc
${ICEBERG_HIVE_THRIFT_GEN_SOURCES})

set(ICEBERG_HIVE_STATIC_BUILD_INTERFACE_LIBS)
set(ICEBERG_HIVE_SHARED_BUILD_INTERFACE_LIBS)
set(ICEBERG_HIVE_STATIC_INSTALL_INTERFACE_LIBS)
set(ICEBERG_HIVE_SHARED_INSTALL_INTERFACE_LIBS)

# thrift::thrift is always required at build time (the generated bindings include
# and link against it). For the *install* interface it depends on the Thrift source:
# * Bundled (from Arrow): the Thrift runtime is baked into the installed Arrow
# bundled-dependencies archive that iceberg core already carries, and there is
# no exported `thrift::thrift` target downstream. Referencing it in the install
# interface would make downstream find_package(Iceberg) fail with a missing
# `thrift::thrift` target, so it must be omitted.
# * System: downstream find_package(Iceberg) re-creates `thrift::thrift` via
# find_dependency(ThriftAlt) (FindThriftAlt.cmake is shipped next to the
# config), so it must be propagated.
list(APPEND ICEBERG_HIVE_STATIC_BUILD_INTERFACE_LIBS
"$<IF:$<TARGET_EXISTS:iceberg_static>,iceberg_static,iceberg_shared>" thrift::thrift)
list(APPEND ICEBERG_HIVE_SHARED_BUILD_INTERFACE_LIBS
"$<IF:$<TARGET_EXISTS:iceberg_shared>,iceberg_shared,iceberg_static>" thrift::thrift)
list(APPEND
ICEBERG_HIVE_STATIC_INSTALL_INTERFACE_LIBS
"$<IF:$<TARGET_EXISTS:iceberg::iceberg_static>,iceberg::iceberg_static,iceberg::iceberg_shared>"
)
list(APPEND
ICEBERG_HIVE_SHARED_INSTALL_INTERFACE_LIBS
"$<IF:$<TARGET_EXISTS:iceberg::iceberg_shared>,iceberg::iceberg_shared,iceberg::iceberg_static>"
)
if(NOT ICEBERG_THRIFT_BUNDLED)
list(APPEND ICEBERG_HIVE_STATIC_INSTALL_INTERFACE_LIBS thrift::thrift)
list(APPEND ICEBERG_HIVE_SHARED_INSTALL_INTERFACE_LIBS thrift::thrift)
endif()

add_iceberg_lib(iceberg_hive
SOURCES
${ICEBERG_HIVE_SOURCES}
PRIVATE_INCLUDES
${_thrift_gen_dir}
SHARED_LINK_LIBS
${ICEBERG_HIVE_SHARED_BUILD_INTERFACE_LIBS}
STATIC_LINK_LIBS
${ICEBERG_HIVE_STATIC_BUILD_INTERFACE_LIBS}
STATIC_INSTALL_INTERFACE_LIBS
${ICEBERG_HIVE_STATIC_INSTALL_INTERFACE_LIBS}
SHARED_INSTALL_INTERFACE_LIBS
${ICEBERG_HIVE_SHARED_INSTALL_INTERFACE_LIBS})

iceberg_install_all_headers(iceberg/catalog/hive)
Loading
Loading