A tool for streaming time series data from a BigQuery table to Pub/Sub
- BigQuery Admin
- Storage Admin
- Pub/Sub Publisher
gcloud config set project project-name
project=$(gcloud config get-value project 2> /dev/null)
credentials=credentials.json
credentials_path=$(dirname $PWD/credentials.json)
topic=bigquery-to-pubsub-test0
bash create_temp_resources.sh
temp_resource_name=$(./get_temp_resource_name.sh)
docker build -t bigquery-to-pubsub:latest -f Dockerfile .
echo "Replaying Ethereum transactions"
docker run \
-v $credentials_path:/bigquery-to-pubsub/ --env GOOGLE_APPLICATION_CREDENTIALS=/bigquery-to-pubsub/$credentials \
bigquery-to-pubsub:latest \
--timestamp-field block_timestamp \
--start-timestamp 2019-10-23T00:00:00 \
--end-timestamp 2019-10-23T01:00:00 \
--batch-size-in-seconds 1800 \
--replay-rate 0.1 \
--pubsub-topic projects/${project}/topics/${topic} \
--temp-bigquery-dataset ${temp_resource_name} \
--temp-bucket ${temp_resource_name} \
--bigquery-table bigquery-public-data.crypto_ethereum.transactions
query=$(cat example_query_2.txt)
query=$(cat example_query_2.sql | perl -ne 'chomp;print qq($_ )')
echo "Replaying Ethereum transactions"
docker run \
-v $credentials_path:/bigquery-to-pubsub/ --env GOOGLE_APPLICATION_CREDENTIALS=/bigquery-to-pubsub/$credentials \
bigquery-to-pubsub:latest \
--timestamp-field block_timestamp \
--start-timestamp 2019-10-23T00:00:00 \
--end-timestamp 2019-10-23T01:00:00 \
--batch-size-in-seconds 1800 \
--replay-rate 2 \
--pubsub-topic projects/${project}/topics/${topic} \
--temp-bigquery-dataset ${temp_resource_name} \
--temp-bucket ${temp_resource_name} \
--query "$query"
Thanks to Merkle Science for donating some exchange wallet labels as CSV here: https://github.com/merklescience/ethereum-exchange-addresses