Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
145 changes: 145 additions & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,84 @@ executors:
working_directory: /chronon
docker:
- image: houpy0829/chronon-ci:base--f87f50dc520f7a73894ae024eb78bd305d5b08e2
modern_ubuntu_executor:
resource_class: xlarge
working_directory: /chronon
docker:
- image: cimg/openjdk:11.0
modern_ubuntu_executor_xxlarge:
resource_class: 2xlarge
working_directory: /chronon
docker:
- image: cimg/openjdk:11.0

commands:
install_build_dependencies:
description: "Install Thrift, Conda, SBT, and Mill on Ubuntu 22.04"
steps:
- run:
name: Install system dependencies
command: |
sudo apt-get update
sudo apt-get install -y \
automake \
bison \
cmake \
flex \
g++ \
git \
libboost-dev \
libboost-filesystem-dev \
libboost-program-options-dev \
libboost-system-dev \
libboost-test-dev \
libevent-dev \
libssl-dev \
libtool \
make \
pkg-config
- run:
name: Install Thrift 0.11.0 from source
command: |
export THRIFT_VERSION=0.11.0
curl -sSL "http://archive.apache.org/dist/thrift/$THRIFT_VERSION/thrift-$THRIFT_VERSION.tar.gz" -o thrift.tar.gz
mkdir -p /tmp/thrift
tar zxf thrift.tar.gz -C /tmp/thrift --strip-components=1
rm thrift.tar.gz
cd /tmp/thrift
./configure --without-python --without-cpp
make
sudo make install
cd /
rm -rf /tmp/thrift
thrift --version
- run:
name: Install SBT
command: |
echo "deb https://repo.scala-sbt.org/scalasbt/debian all main" | sudo tee /etc/apt/sources.list.d/sbt.list
curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x2EE0EA64E40A89B84B2DF73499E82A75642AC823" | sudo apt-key add
sudo apt-get update
sudo apt-get install -y sbt
- run:
name: Install Miniconda
command: |
wget -q https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh
bash ~/miniconda.sh -b -p $HOME/miniconda
echo 'export PATH=$HOME/miniconda/bin:$PATH' >> $BASH_ENV
source $BASH_ENV
conda create -y -n chronon_py python=3.7
conda install -y -q -n chronon_py --no-deps virtualenv
$HOME/miniconda/envs/chronon_py/bin/pip install \
flake8==5.0.4 flake8-quotes==3.3.1 thrift==0.11.0 click==7.0 thrift_json==0.1.0 nose>=1.3.7
$HOME/miniconda/envs/chronon_py/bin/pip install build
- run:
name: Install Mill with checksum verification
command: |
curl -L https://github.com/com-lihaoyi/mill/releases/download/0.10.15/0.10.15 -o mill
echo "d90132b1a4ebe4d55d2bc43b3f18b5d6e8e3d12d89a83f83ad2276867e127916 mill" | sha256sum -c -
chmod +x mill
sudo mv mill /usr/local/bin/mill
mill --version

jobs:
"Pull Docker Image":
Expand Down Expand Up @@ -195,6 +273,64 @@ jobs:
destination: spark_warehouse.tar.gz
when: on_fail

"Mill -- Compile All":
executor: modern_ubuntu_executor
steps:
- checkout
- install_build_dependencies
- run:
name: Compile all modules with Mill
shell: /bin/bash -leuxo pipefail
command: |
source $BASH_ENV
conda activate chronon_py
# Prepare scala version-specific sources for Mill
mill api.prepareScalaSources
# Compile all modules
mill api.compile aggregator.compile online.compile spark.compile flink.compile

"Mill -- Tests":
executor: modern_ubuntu_executor_xxlarge
steps:
- checkout
- install_build_dependencies
- run:
name: Run all tests with Mill
shell: /bin/bash -leuxo pipefail
command: |
source $BASH_ENV
conda activate chronon_py
mill api.prepareScalaSources
mill api.test aggregator.test online.test spark.test flink.test
- run:
name: Compress spark-warehouse
command: |
cd /tmp/ && tar -czvf spark-warehouse.tar.gz chronon/spark-warehouse
when: on_fail
- store_artifacts:
path: /tmp/spark-warehouse.tar.gz
destination: spark_warehouse.tar.gz
when: on_fail

"Mill -- Python API Build":
executor: modern_ubuntu_executor
steps:
- checkout
- install_build_dependencies
- run:
name: Build Python API with Mill
shell: /bin/bash -leuxo pipefail
command: |
source $BASH_ENV
conda activate chronon_py
# Set project root for Mill commands
export CHRONON_ROOT=/chronon
# Generate Python Thrift and build wheel
mill generatePythonThrift
mill buildPythonApi
- store_artifacts:
path: /chronon/api/py/dist

workflows:
build_test_deploy:
jobs:
Expand All @@ -221,5 +357,14 @@ workflows:
requires:
- "Pull Docker Image"
- "Scala 13 -- Iceberg Table Utils Tests":
requires:
- "Pull Docker Image"
- "Mill -- Compile All":
requires:
- "Pull Docker Image"
- "Mill -- Tests":
requires:
- "Pull Docker Image"
- "Mill -- Python API Build":
requires:
- "Pull Docker Image"
69 changes: 69 additions & 0 deletions aggregator/package.mill
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
package build.aggregator

import mill._
import mill.javalib._
import mill.javalib.publish._
import mill.scalalib.SbtModule

object `package` extends SbtModule with PublishModule {

def scalaVersion = "2.12.12"

def mvnDeps = Seq(
mvn"com.google.code.gson:gson:2.8.6",
mvn"com.yahoo.datasketches:sketches-core:0.13.4",
mvn"org.apache.commons:commons-lang3:3.12.0"
)

def moduleDeps = super.moduleDeps ++ Seq(build.api)

def compileMvnDeps = Seq(
mvn"org.apache.spark::spark-core:3.1.1",
mvn"org.apache.spark::spark-sql:3.1.1"
)

def pomSettings = PomSettings(
"Chronon is a feature engineering platform",
"ai.chronon",
"https://github.com/airbnb/chronon",
Seq(License(
"Apache 2",
"Apache 2",
"http://www.apache.org/licenses/LICENSE-2.0.txt",
false,
false,
"repo"
)),
VersionControl(
Some("https://github.com/airbnb/chronon"),
Some("scm:[email protected]:airbnb/chronon.git"),
None,
None
),
Seq(Developer(
"nikhilsimha",
"Nikhil Simha",
"http://nikhilsimha.com",
None,
None
))
)

def publishVersion = "awhittier-mill-0.0.110-SNAPSHOT"

object test extends SbtTests with TestModule.Junit4 {

def mvnDeps = Seq(
mvn"junit:junit:4.13.2",
mvn"com.novocode:junit-interface:0.11",
mvn"org.scalatest::scalatest:3.2.15",
mvn"org.apache.commons:commons-math3:3.6.1"
)

def moduleDeps = super.moduleDeps ++ Seq(build.api.test)

def testSandboxWorkingDir = false
def testParallelism = false

}
}
105 changes: 105 additions & 0 deletions api/package.mill
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
package build.api

import mill._
import mill.javalib._
import mill.javalib.publish._
import mill.scalalib.SbtModule

object `package` extends SbtModule with PublishModule {

def scalaVersion = "2.12.12"

def mvnDeps = Seq(
mvn"org.apache.thrift:libthrift:0.13.0",
mvn"org.scala-lang.modules::scala-collection-compat:2.6.0",
mvn"org.scala-lang:scala-reflect:2.12.12"
)

def compileMvnDeps = Seq(
mvn"org.apache.spark::spark-core:3.1.1",
mvn"org.apache.spark::spark-sql:3.1.1"
)

def pomSettings = PomSettings(
"Chronon is a feature engineering platform",
"ai.chronon",
"https://github.com/airbnb/chronon",
Seq(License(
"Apache 2",
"Apache 2",
"http://www.apache.org/licenses/LICENSE-2.0.txt",
false,
false,
"repo"
)),
VersionControl(
Some("https://github.com/airbnb/chronon"),
Some("scm:[email protected]:airbnb/chronon.git"),
None,
None
),
Seq(Developer(
"nikhilsimha",
"Nikhil Simha",
"http://nikhilsimha.com",
None,
None
))
)

def publishVersion = "awhittier-mill-0.0.110-SNAPSHOT"

/**
* Prepare Scala version-specific sources for Mill compilation.
*
* Mill's security model prevents reading from scala-2.12/ during Task execution,
* so we copy version-specific sources to the main scala/ directory before compilation.
*
* This is a one-time setup command - run it once per clean build:
* ./mill api.prepareScalaSources
*
* SBT users don't need this - sbt automatically picks up scala-2.12/ directories.
*/
def prepareScalaSources() = Task.Command {
val scala212Dir = millSourcePath0 / "src" / "main" / "scala-2.12"
val scalaDir = millSourcePath0 / "src" / "main" / "scala"

os.walk(scala212Dir)
.filter(_.ext == "scala")
.foreach { sourceFile =>
val relativePath = sourceFile.relativeTo(scala212Dir)
val targetFile = scalaDir / relativePath
os.makeDir.all(targetFile / os.up)
os.copy.over(sourceFile, targetFile, createFolders = true)
println(s"✓ Copied: ${sourceFile.last}")
}
println(s"✓ Scala 2.12 sources prepared for Mill compilation")
()
}

// Generate Java sources from Thrift
def generatedSources = Task {
val thriftFile = millSourcePath0 / "thrift" / "api.thrift"
val outDir = Task.dest / "java"
os.remove.all(outDir)
os.makeDir.all(outDir)
os.proc("thrift", "--gen", "java", "-out", outDir, thriftFile)
.call(stdout = os.Inherit)

os.walk(outDir).filter(_.ext == "java").map(PathRef(_))
}

object test extends SbtTests with TestModule.Junit4 {

def mvnDeps = Seq(
mvn"com.novocode:junit-interface:0.11",
mvn"junit:junit:4.13.2",
mvn"org.scalatest::scalatest:3.2.15",
mvn"org.scalatestplus::mockito-3-4:3.2.10.0"
)

def testSandboxWorkingDir = false
def testParallelism = false

}
}
17 changes: 17 additions & 0 deletions api/py/python-api-build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,23 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
echo "Removing old distributions..."
rm -f $SCRIPT_DIR/dist/*

# Create and activate venv if it doesn't exist
VENV_DIR="$SCRIPT_DIR/../../venv"
if [[ ! -d "$VENV_DIR" ]]; then
echo "Creating Python virtual environment..."
python3 -m venv "$VENV_DIR"
fi

# Activate venv
echo "Activating Python virtual environment..."
source "$VENV_DIR/bin/activate"

# Install build dependencies in venv if needed
if ! python3 -c "import build" 2>/dev/null; then
echo "Installing Python build dependencies..."
pip install --quiet build twine
fi

# The default action is "build"
if [[ -z "${ACTION}" ]] || [[ "${ACTION}" == "build" ]]; then
PYPI_REPOSITORY="internal"
Expand Down
Loading