From f009d4cce05c0b2e5ab84ac69bf546bc11953963 Mon Sep 17 00:00:00 2001 From: Saurabh Daga Date: Tue, 29 Oct 2019 17:19:31 +0530 Subject: [PATCH 01/51] fix --- .idea/.gitignore | 3 + .idea/bitcoin-etl.iml | 9 ++ .idea/codeStyles/codeStyleConfig.xml | 5 + .idea/misc.xml | 4 + .idea/modules.xml | 8 ++ .idea/vcs.xml | 6 + bitcoinetl/cli/export_all.py | 9 +- .../cli/export_blocks_and_transactions.py | 11 +- bitcoinetl/domain/block.py | 1 + bitcoinetl/domain/transaction.py | 3 + bitcoinetl/enumeration/chain.py | 20 +++ bitcoinetl/jobs/export_all.py | 9 +- bitcoinetl/jobs/export_blocks_job.py | 7 +- .../blocks_and_transactions_item_exporter.py | 6 +- bitcoinetl/mappers/block_mapper.py | 4 +- bitcoinetl/mappers/transaction_mapper.py | 2 + bitcoinetl/service/btc_service.py | 65 ++++++++- blockchainetl/cryptocompare.py | 125 ++++++++++++++++++ 18 files changed, 282 insertions(+), 15 deletions(-) create mode 100644 .idea/.gitignore create mode 100644 .idea/bitcoin-etl.iml create mode 100644 .idea/codeStyles/codeStyleConfig.xml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/vcs.xml create mode 100644 blockchainetl/cryptocompare.py diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..0e40fe8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,3 @@ + +# Default ignored files +/workspace.xml \ No newline at end of file diff --git a/.idea/bitcoin-etl.iml b/.idea/bitcoin-etl.iml new file mode 100644 index 0000000..d6ebd48 --- /dev/null +++ b/.idea/bitcoin-etl.iml @@ -0,0 +1,9 @@ + + + + + + + + + \ No newline at end of file diff --git a/.idea/codeStyles/codeStyleConfig.xml b/.idea/codeStyles/codeStyleConfig.xml new file mode 100644 index 0000000..a55e7a1 --- /dev/null +++ b/.idea/codeStyles/codeStyleConfig.xml @@ -0,0 +1,5 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..e588603 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..0b204cf --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..35eb1dd --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/bitcoinetl/cli/export_all.py b/bitcoinetl/cli/export_all.py index e235f9a..2628e8a 100644 --- a/bitcoinetl/cli/export_all.py +++ b/bitcoinetl/cli/export_all.py @@ -25,7 +25,7 @@ import re from datetime import datetime, timedelta -from bitcoinetl.enumeration.chain import Chain +from bitcoinetl.enumeration.chain import Chain, CoinPriceType from bitcoinetl.jobs.export_all import export_all as do_export_all from bitcoinetl.service.btc_block_range_service import BtcBlockRangeService from bitcoinetl.rpc.bitcoin_rpc import BitcoinRpc @@ -96,7 +96,10 @@ def get_partitions(start, end, partition_batch_size, provider_uri): @click.option('-c', '--chain', default=Chain.BITCOIN, type=click.Choice(Chain.ALL), help='The type of chain.') @click.option('--enrich', default=False, type=bool, help='Enable filling in transactions inputs fields.') -def export_all(start, end, partition_batch_size, provider_uri, output_dir, max_workers, export_batch_size, chain, enrich): +@click.option('--coin-price-type', default=CoinPriceType.empty, type=int, + help='Enable querying CryptoCompare for coin prices. 0 for no price, 1 for daily price, 2 for hourly price.') +def export_all(start, end, partition_batch_size, provider_uri, output_dir, max_workers, export_batch_size, chain, enrich, coin_price_type): """Exports all data for a range of blocks.""" do_export_all(chain, get_partitions(start, end, partition_batch_size, provider_uri), - output_dir, provider_uri, max_workers, export_batch_size, enrich) + output_dir, provider_uri, max_workers, export_batch_size, enrich, + coin_price_type) diff --git a/bitcoinetl/cli/export_blocks_and_transactions.py b/bitcoinetl/cli/export_blocks_and_transactions.py index 1ab1a09..9d75e08 100644 --- a/bitcoinetl/cli/export_blocks_and_transactions.py +++ b/bitcoinetl/cli/export_blocks_and_transactions.py @@ -23,7 +23,7 @@ import click -from bitcoinetl.enumeration.chain import Chain +from bitcoinetl.enumeration.chain import Chain, CoinPriceType from bitcoinetl.jobs.export_blocks_job import ExportBlocksJob from bitcoinetl.jobs.exporters.blocks_and_transactions_item_exporter import blocks_and_transactions_item_exporter from bitcoinetl.rpc.bitcoin_rpc import BitcoinRpc @@ -48,8 +48,11 @@ 'If not provided transactions will not be exported. Use "-" for stdout') @click.option('-c', '--chain', default=Chain.BITCOIN, type=click.Choice(Chain.ALL), help='The type of chain') +@click.option('--coin-price-type', default=CoinPriceType.empty, type=int, + help='Enable querying CryptoCompare for coin prices. 0 for no price, 1 for daily price, 2 for hourly price.') def export_blocks_and_transactions(start_block, end_block, batch_size, provider_uri, - max_workers, blocks_output, transactions_output, chain): + max_workers, blocks_output, transactions_output, chain, + coin_price_type): """Export blocks and transactions.""" if blocks_output is None and transactions_output is None: raise ValueError('Either --blocks-output or --transactions-output options must be provided') @@ -63,5 +66,7 @@ def export_blocks_and_transactions(start_block, end_block, batch_size, provider_ item_exporter=blocks_and_transactions_item_exporter(blocks_output, transactions_output), chain=chain, export_blocks=blocks_output is not None, - export_transactions=transactions_output is not None) + export_transactions=transactions_output is not None, + coin_price_type=coin_price_type + ) job.run() diff --git a/bitcoinetl/domain/block.py b/bitcoinetl/domain/block.py index 97a8219..87019ed 100644 --- a/bitcoinetl/domain/block.py +++ b/bitcoinetl/domain/block.py @@ -38,6 +38,7 @@ def __init__(self): self.coinbase_param = None self.transactions = [] + self.coin_price_usd = None def has_full_transactions(self): return len(self.transactions) > 0 and isinstance(self.transactions[0], BtcTransaction) diff --git a/bitcoinetl/domain/transaction.py b/bitcoinetl/domain/transaction.py index ef56275..9ddb693 100644 --- a/bitcoinetl/domain/transaction.py +++ b/bitcoinetl/domain/transaction.py @@ -43,6 +43,9 @@ def __init__(self): self.join_splits = [] self.value_balance = 0 + # New fields + self.coin_price_usd = None + def add_input(self, input): if len(self.inputs) > 0: input.index = self.inputs[len(self.inputs) - 1].index + 1 diff --git a/bitcoinetl/enumeration/chain.py b/bitcoinetl/enumeration/chain.py index d54728b..f115d9b 100644 --- a/bitcoinetl/enumeration/chain.py +++ b/bitcoinetl/enumeration/chain.py @@ -10,3 +10,23 @@ class Chain: ALL = [BITCOIN, BITCOIN_CASH, DOGECOIN, LITECOIN, DASH, ZCASH, MONACOIN] # Old API doesn't support verbosity for getblock which doesn't allow querying all transactions in a block in 1 go. HAVE_OLD_API = [BITCOIN_CASH, DOGECOIN, DASH, MONACOIN] + + @classmethod + def ticker_symbol(cls, chain): + symbols = { + 'bitcoin': 'BTC', + 'bitcoin_cash': 'BCH', + 'dogecoin': 'DOGE', + 'litecoin': 'LTC', + 'dash': 'DASH', + 'zcash': 'ZEC', + 'monacoin': 'MONA', + } + return symbols.get(chain, None) + + +class CoinPriceType: + + empty = 0 + daily = 1 + hourly = 2 diff --git a/bitcoinetl/jobs/export_all.py b/bitcoinetl/jobs/export_all.py index 8a33591..e3a75d2 100644 --- a/bitcoinetl/jobs/export_all.py +++ b/bitcoinetl/jobs/export_all.py @@ -40,7 +40,10 @@ logger = logging.getLogger('export_all') -def export_all(chain, partitions, output_dir, provider_uri, max_workers, batch_size, enrich): +def export_all( + chain, partitions, output_dir, provider_uri, max_workers, batch_size, enrich, + coin_price_type + ): for batch_start_block, batch_end_block, partition_dir, *args in partitions: # # # start # # # @@ -101,7 +104,9 @@ def export_all(chain, partitions, output_dir, provider_uri, max_workers, batch_s max_workers=max_workers, item_exporter=blocks_and_transactions_item_exporter(blocks_file, transactions_file), export_blocks=blocks_file is not None, - export_transactions=transactions_file is not None) + export_transactions=transactions_file is not None, + coin_price_type=coin_price_type, + ) job.run() if enrich == True: diff --git a/bitcoinetl/jobs/export_blocks_job.py b/bitcoinetl/jobs/export_blocks_job.py index c4b384c..e19b2a1 100644 --- a/bitcoinetl/jobs/export_blocks_job.py +++ b/bitcoinetl/jobs/export_blocks_job.py @@ -27,6 +27,7 @@ from blockchainetl.executors.batch_work_executor import BatchWorkExecutor from blockchainetl.jobs.base_job import BaseJob from blockchainetl.utils import validate_range +from bitcoinetl.enumeration.chain import CoinPriceType # Exports blocks and transactions @@ -41,8 +42,10 @@ def __init__( item_exporter, chain, export_blocks=True, - export_transactions=True): + export_transactions=True, + coin_price_type=CoinPriceType.empty): validate_range(start_block, end_block) + self.start_block = start_block self.end_block = end_block @@ -54,7 +57,7 @@ def __init__( if not self.export_blocks and not self.export_transactions: raise ValueError('At least one of export_blocks or export_transactions must be True') - self.btc_service = BtcService(bitcoin_rpc, chain) + self.btc_service = BtcService(bitcoin_rpc, chain, coin_price_type) self.block_mapper = BtcBlockMapper() self.transaction_mapper = BtcTransactionMapper() diff --git a/bitcoinetl/jobs/exporters/blocks_and_transactions_item_exporter.py b/bitcoinetl/jobs/exporters/blocks_and_transactions_item_exporter.py index c1e8f0e..08b252d 100644 --- a/bitcoinetl/jobs/exporters/blocks_and_transactions_item_exporter.py +++ b/bitcoinetl/jobs/exporters/blocks_and_transactions_item_exporter.py @@ -35,7 +35,8 @@ 'nonce', 'bits', 'coinbase_param', - 'transaction_count' + 'transaction_count', + "coin_price_usd", ] TRANSACTION_FIELDS_TO_EXPORT = [ @@ -57,7 +58,8 @@ 'output_count', 'input_value', 'output_value', - 'fee' + 'fee', + 'coin_price_usd', ] diff --git a/bitcoinetl/mappers/block_mapper.py b/bitcoinetl/mappers/block_mapper.py index fce6095..fc0b8d1 100644 --- a/bitcoinetl/mappers/block_mapper.py +++ b/bitcoinetl/mappers/block_mapper.py @@ -58,6 +58,7 @@ def json_dict_to_block(self, json_dict): block.transaction_count = len(raw_transactions) + block.coin_price_usd = json_dict.get('coin_price_usd') return block def block_to_dict(self, block): @@ -74,7 +75,8 @@ def block_to_dict(self, block): 'nonce': block.nonce, 'bits': block.bits, 'coinbase_param': block.coinbase_param, - 'transaction_count': len(block.transactions) + 'transaction_count': len(block.transactions), + "coin_price_usd": block.coin_price_usd, } diff --git a/bitcoinetl/mappers/transaction_mapper.py b/bitcoinetl/mappers/transaction_mapper.py index 93f2f29..511b0cb 100644 --- a/bitcoinetl/mappers/transaction_mapper.py +++ b/bitcoinetl/mappers/transaction_mapper.py @@ -89,6 +89,7 @@ def transaction_to_dict(self, transaction): 'input_value': transaction.calculate_input_value(), 'output_value': transaction.calculate_output_value(), 'fee': transaction.calculate_fee(), + 'coin_price_usd': transaction.coin_price_usd, } return result @@ -104,6 +105,7 @@ def dict_to_transaction(self, dict): transaction.block_timestamp = dict.get('block_timestamp') transaction.is_coinbase = dict.get('is_coinbase') transaction.index = dict.get('index') + transaction.coin_price_usd = dict.get('coin_price_usd') transaction.inputs = self.transaction_input_mapper.dicts_to_inputs(dict.get('inputs')) transaction.outputs = self.transaction_output_mapper.dicts_to_outputs(dict.get('outputs')) diff --git a/bitcoinetl/service/btc_service.py b/bitcoinetl/service/btc_service.py index d768618..83ea034 100644 --- a/bitcoinetl/service/btc_service.py +++ b/bitcoinetl/service/btc_service.py @@ -22,7 +22,7 @@ from bitcoinetl.domain.transaction_input import BtcTransactionInput from bitcoinetl.domain.transaction_output import BtcTransactionOutput -from bitcoinetl.enumeration.chain import Chain +from bitcoinetl.enumeration.chain import Chain, CoinPriceType from bitcoinetl.json_rpc_requests import generate_get_block_hash_by_number_json_rpc, \ generate_get_block_by_hash_json_rpc, generate_get_transaction_by_id_json_rpc from bitcoinetl.mappers.block_mapper import BtcBlockMapper @@ -30,14 +30,22 @@ from bitcoinetl.service.btc_script_service import script_hex_to_non_standard_address from bitcoinetl.service.genesis_transactions import GENESIS_TRANSACTIONS from blockchainetl.utils import rpc_response_batch_to_results, dynamic_batch_iterator +from blockchainetl.cryptocompare import ( + get_coin_price, + get_hour_id_from_ts, + get_day_id_from_ts, + get_ts_from_hour_id, + get_ts_from_day_id +) class BtcService(object): - def __init__(self, bitcoin_rpc, chain=Chain.BITCOIN): + def __init__(self, bitcoin_rpc, chain=Chain.BITCOIN, coin_price_type=CoinPriceType.empty): self.bitcoin_rpc = bitcoin_rpc self.block_mapper = BtcBlockMapper() self.transaction_mapper = BtcTransactionMapper() self.chain = chain + self.coin_price_type = coin_price_type def get_block(self, block_number, with_transactions=False): block_hashes = self.get_block_hashes([block_number]) @@ -73,10 +81,14 @@ def get_blocks_by_hashes(self, block_hash_batch, with_transactions=True): if self.chain in Chain.HAVE_OLD_API and with_transactions: self._fetch_transactions(blocks) + self._add_coin_price_to_blocks(blocks, self.coin_price_type) + for block in blocks: self._remove_coinbase_input(block) + if block.has_full_transactions(): for transaction in block.transactions: + self._add_coin_price_to_transaction(transaction, block.coin_price_usd) self._add_non_standard_addresses(transaction) if self.chain == Chain.ZCASH: self._add_shielded_inputs_and_outputs(transaction) @@ -186,5 +198,54 @@ def _add_shielded_inputs_and_outputs(self, transaction): output.value = -transaction.value_balance transaction.add_output(output) + def non_coinbase_txs(self, block): + return [transaction + for transaction in block.transactions + if not transaction.transaction_id != block.coinbase_tx + ] + + def get_transaction_ids(self, block): + return [tx.transaction_id for tx in block.transactions] + + def get_block_reward(self, block): + return block.coinbase_tx.calculate_output_value() + + def get_input_value(self, block): + non_coinbase_txs = self.non_coinbase_txs(block) + return sum([tx.calculate_input_value() for tx in non_coinbase_txs]) + + def _add_coin_price_to_blocks(self, blocks, coin_price_type): + from_currency_code = Chain.ticker_symbol(self.chain) + + if not from_currency_code or coin_price_type == CoinPriceType.empty: + return + + elif coin_price_type == CoinPriceType.hourly: + block_hour_ids = list(set([get_hour_id_from_ts(block.timestamp) for block in blocks])) + block_hours_ts = {hour_id: get_ts_from_hour_id(hour_id) for hour_id in block_hour_ids} + coin_price_hours = { + hour_id: get_coin_price(from_currency_code=from_currency_code, timestamp=hour_ts, resource="histohour") + for hour_id, hour_ts in block_hours_ts.items() + } + + for block in blocks: + block_hour_id = get_hour_id_from_ts(block.timestamp) + block.coin_price_usd = coin_price_hours[block_hour_id] + + elif coin_price_type == CoinPriceType.daily: + block_day_ids = list(set([get_day_id_from_ts(block.timestamp) for block in blocks])) + block_days_ts = {day_id: get_ts_from_day_id(day_id) for day_id in block_day_ids} + coin_price_days = { + day_id: get_coin_price(from_currency_code=from_currency_code, timestamp=day_ts, resource="histoday") + for day_id, day_ts in block_days_ts.items() + } + + for block in blocks: + block_day_id = get_day_id_from_ts(block.timestamp) + block.coin_price_usd = coin_price_days[block_day_id] + + def _add_coin_price_to_transaction(self, transaction, coin_price_usd): + transaction.coin_price_usd = coin_price_usd + ADDRESS_TYPE_SHIELDED = 'shielded' diff --git a/blockchainetl/cryptocompare.py b/blockchainetl/cryptocompare.py new file mode 100644 index 0000000..ffd835d --- /dev/null +++ b/blockchainetl/cryptocompare.py @@ -0,0 +1,125 @@ +# MIT License +# +# Copyright (c) 2019 Nirmal AK, nirmal@merklescience.com +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import os +import requests +from time import time +from math import floor +from datetime import datetime, timedelta + + +CRYPTOCOMPARE_API_KEY = os.getenv("CRYPTOCOMPARE_API_KEY", "") + + +class CryptoCompareRequestException(Exception): + pass + + +def get_hour_id_from_ts(timestamp: int) -> int: + """ + returns the number of hours elapsed since 1st Jan 2000 + """ + base_ts = datetime(2000, 1, 1).timestamp() + seconds_to_hour = 60 * 60 + return floor((int(timestamp) - base_ts) / seconds_to_hour) + + +def get_day_id_from_ts(timestamp: int) -> int: + """ + returns the number of days elapsed since 1st Jan 2000 + """ + base_ts = datetime(2000, 1, 1).timestamp() + seconds_to_day = 60 * 60 * 24 + return floor((int(timestamp) - base_ts) / seconds_to_day) + + +def get_ts_from_hour_id(hour_id: int) -> int: + base_date = datetime(2000, 1, 1) + reference_date = base_date + timedelta(hours=hour_id) + return floor(reference_date.timestamp()) + + +def get_ts_from_day_id(day_id: int) -> int: + base_date = datetime(2000, 1, 1) + reference_date = base_date + timedelta(days=day_id) + return floor(reference_date.timestamp()) + + +def _make_request( + resource: str, + from_currency_code: str, + to_currency_code: str, + timestamp: int, + access_token: str, + exchange_code: str, + num_records: int, + api_version: str + ) -> requests.Response: + """ + API documentation for cryptocompare can be found at https://min-api.cryptocompare.com/documentation + """ + base_url = f"https://min-api.cryptocompare.com/data/{api_version}/{resource}" + params = { + "fsym": from_currency_code, + "tsym": to_currency_code, + "e": exchange_code, + "limit": num_records, + "toTs": timestamp, + "api_key": access_token + } + return requests.get(base_url, params=params) + + +def get_coin_price( + from_currency_code: str, + timestamp: int, + resource="histohour", + to_currency_code: str="USD", + exchange_code: str="CCCAGG", + num_records: int=1, + api_version: str ="v2", + access_token: str=CRYPTOCOMPARE_API_KEY, + ): + """ + Prices are retrieved from hourly price resource as prices + are available for historical data from when available + """ + response = _make_request( + resource=resource, + from_currency_code=from_currency_code, + to_currency_code=to_currency_code, + timestamp=int(timestamp), + access_token=access_token, + exchange_code=exchange_code, + num_records=num_records, + api_version=api_version, + ) + if not response.status_code == 200: + raise CryptoCompareRequestException + + payload = response.json() + if payload["Type"] != 100: + raise CryptoCompareRequestException(payload.get("Message", "")) + + data = payload["Data"]["Data"] + avg_price = sum(item["open"] for item in data) / len(data) + return round(avg_price, 2) From 269926f225fecf85d410d92305ce47cd4e45a4bb Mon Sep 17 00:00:00 2001 From: Nirmal Date: Wed, 30 Oct 2019 10:36:38 +0530 Subject: [PATCH 02/51] remove ide files --- .gitignore | 2 ++ .idea/.gitignore | 3 --- .idea/bitcoin-etl.iml | 9 --------- .idea/codeStyles/codeStyleConfig.xml | 5 ----- .idea/misc.xml | 4 ---- .idea/modules.xml | 8 -------- .idea/vcs.xml | 6 ------ 7 files changed, 2 insertions(+), 35 deletions(-) delete mode 100644 .idea/.gitignore delete mode 100644 .idea/bitcoin-etl.iml delete mode 100644 .idea/codeStyles/codeStyleConfig.xml delete mode 100644 .idea/misc.xml delete mode 100644 .idea/modules.xml delete mode 100644 .idea/vcs.xml diff --git a/.gitignore b/.gitignore index 77a4b16..ed2a59a 100644 --- a/.gitignore +++ b/.gitignore @@ -48,3 +48,5 @@ coverage.xml .venv venv/ ENV/ + +.idea/ diff --git a/.idea/.gitignore b/.idea/.gitignore deleted file mode 100644 index 0e40fe8..0000000 --- a/.idea/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ - -# Default ignored files -/workspace.xml \ No newline at end of file diff --git a/.idea/bitcoin-etl.iml b/.idea/bitcoin-etl.iml deleted file mode 100644 index d6ebd48..0000000 --- a/.idea/bitcoin-etl.iml +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - \ No newline at end of file diff --git a/.idea/codeStyles/codeStyleConfig.xml b/.idea/codeStyles/codeStyleConfig.xml deleted file mode 100644 index a55e7a1..0000000 --- a/.idea/codeStyles/codeStyleConfig.xml +++ /dev/null @@ -1,5 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml deleted file mode 100644 index e588603..0000000 --- a/.idea/misc.xml +++ /dev/null @@ -1,4 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml deleted file mode 100644 index 0b204cf..0000000 --- a/.idea/modules.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml deleted file mode 100644 index 35eb1dd..0000000 --- a/.idea/vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file From 8bafe1061c3e333adac310d4e82db3270d9c4650 Mon Sep 17 00:00:00 2001 From: Nirmal Date: Wed, 30 Oct 2019 21:24:57 +0530 Subject: [PATCH 03/51] add new fields to blocks and transactions export_blocks job --- bitcoinetl/domain/block.py | 18 ++++++ bitcoinetl/domain/transaction.py | 8 +++ bitcoinetl/domain/transaction_input.py | 9 ++- bitcoinetl/domain/transaction_output.py | 4 ++ .../blocks_and_transactions_item_exporter.py | 56 ++++++++++++------- bitcoinetl/mappers/block_mapper.py | 18 +++++- .../mappers/transaction_input_mapper.py | 32 +++++++---- bitcoinetl/mappers/transaction_mapper.py | 32 ++++++++++- .../mappers/transaction_output_mapper.py | 22 ++++++-- bitcoinetl/service/btc_service.py | 22 +++----- 10 files changed, 165 insertions(+), 56 deletions(-) diff --git a/bitcoinetl/domain/block.py b/bitcoinetl/domain/block.py index 87019ed..5e5ee7d 100644 --- a/bitcoinetl/domain/block.py +++ b/bitcoinetl/domain/block.py @@ -24,6 +24,7 @@ class BtcBlock(object): + def __init__(self): self.hash = None self.size = None @@ -36,9 +37,26 @@ def __init__(self): self.nonce = None self.bits = None self.coinbase_param = None + self.transaction_count = None self.transactions = [] + + # New fields added + self.transaction_ids = [] + + self.version_hex = None + self.median_timestamp = None + self.difficulty = None + self.chain_work = None + self.previous_block_hash = None + self.next_block_hash = None + self.input_value = None + + self.block_reward = None + self.transaction_fees = None self.coin_price_usd = None + self.coinbase_txid = None + self.coinbase_param_decoded = None def has_full_transactions(self): return len(self.transactions) > 0 and isinstance(self.transactions[0], BtcTransaction) diff --git a/bitcoinetl/domain/transaction.py b/bitcoinetl/domain/transaction.py index 9ddb693..2455457 100644 --- a/bitcoinetl/domain/transaction.py +++ b/bitcoinetl/domain/transaction.py @@ -44,7 +44,15 @@ def __init__(self): self.value_balance = 0 # New fields + self.transaction_id = None + self.weight = None + self.input_count = None + self.input_value = None + self.output_count = None + self.output_value = None self.coin_price_usd = None + self.output_addresses = [] + self.input_addresses = [] def add_input(self, input): if len(self.inputs) > 0: diff --git a/bitcoinetl/domain/transaction_input.py b/bitcoinetl/domain/transaction_input.py index 95a3bf6..d2a39d8 100644 --- a/bitcoinetl/domain/transaction_input.py +++ b/bitcoinetl/domain/transaction_input.py @@ -23,9 +23,12 @@ class BtcTransactionInput(object): def __init__(self): + self.create_transaction_id = None + self.create_output_index = None + + self.spending_transaction_id = None self.index = None - self.spent_transaction_hash = None - self.spent_output_index = None + self.script_asm = None self.script_hex = None self.coinbase_param = None @@ -37,4 +40,4 @@ def __init__(self): self.value = None def is_coinbase(self): - return self.coinbase_param is not None or self.spent_transaction_hash is None + return self.coinbase_param is not None or self.create_transaction_id is None diff --git a/bitcoinetl/domain/transaction_output.py b/bitcoinetl/domain/transaction_output.py index 2d3b2ca..f5c53bf 100644 --- a/bitcoinetl/domain/transaction_output.py +++ b/bitcoinetl/domain/transaction_output.py @@ -31,3 +31,7 @@ def __init__(self): self.addresses = [] self.value = None + self.witness = [] + + self.create_transaction_id = None + self.spending_transaction_id = None diff --git a/bitcoinetl/jobs/exporters/blocks_and_transactions_item_exporter.py b/bitcoinetl/jobs/exporters/blocks_and_transactions_item_exporter.py index 08b252d..a6bbd58 100644 --- a/bitcoinetl/jobs/exporters/blocks_and_transactions_item_exporter.py +++ b/bitcoinetl/jobs/exporters/blocks_and_transactions_item_exporter.py @@ -23,42 +23,58 @@ from blockchainetl.jobs.exporters.composite_item_exporter import CompositeItemExporter + BLOCK_FIELDS_TO_EXPORT = [ - 'hash', - 'size', - 'stripped_size', - 'weight', - 'number', - 'version', - 'merkle_root', - 'timestamp', - 'nonce', - 'bits', - 'coinbase_param', - 'transaction_count', + "hash", + "number", + "timestamp", + "median_timestamp", + "merkle_root", + "coinbase_param", + "coinbase_param_decoded", + "coinbase_txid", + "previous_block_hash", + "next_block_hash", + "nonce", + "difficulty", + "chain_work", + "version", + "version_hex", + "size", + "stripped_size", + "weight", + "bits", + "transaction_count", + "transaction_fees", + "block_reward", + "input_value", + "transaction_ids", "coin_price_usd", ] + TRANSACTION_FIELDS_TO_EXPORT = [ + 'transaction_id', 'hash', - 'size', - 'virtual_size', - 'version', - 'lock_time', 'block_number', 'block_hash', 'block_timestamp', 'is_coinbase', + 'lock_time', + 'size', + 'virtual_size', + 'weight', + 'version', 'index', - - 'inputs', - 'outputs', - 'input_count', 'output_count', 'input_value', 'output_value', 'fee', + 'input_addresses', + 'output_addresses', + 'inputs', + 'outputs', 'coin_price_usd', ] diff --git a/bitcoinetl/mappers/block_mapper.py b/bitcoinetl/mappers/block_mapper.py index fc0b8d1..ddf1fce 100644 --- a/bitcoinetl/mappers/block_mapper.py +++ b/bitcoinetl/mappers/block_mapper.py @@ -58,6 +58,14 @@ def json_dict_to_block(self, json_dict): block.transaction_count = len(raw_transactions) + # New fields + block.transaction_count = json_dict.get("nTx") + block.version_hex = json_dict.get("versionHex") + block.median_timestamp = json_dict.get("mediantime") + block.difficulty = int(json_dict.get("difficulty")) + block.chain_work = json_dict.get("chainwork") + block.coinbase_txid = json_dict.get("coinbase_txid") + block.previous_block_hash = json_dict.get("previousblockhash") block.coin_price_usd = json_dict.get('coin_price_usd') return block @@ -75,7 +83,15 @@ def block_to_dict(self, block): 'nonce': block.nonce, 'bits': block.bits, 'coinbase_param': block.coinbase_param, - 'transaction_count': len(block.transactions), + 'coinbase_param_decoded': block.coinbase_param_decoded, + 'coinbase_txid': block.coinbase_txid, + 'transaction_count': block.transaction_count, + 'block_reward': block.block_reward, + 'version_hex': block.version_hex, + 'median_timestamp': block.median_timestamp, + 'difficulty': block.difficulty, + 'chain_work': block.chain_work, + 'previous_block_hash': block.previous_block_hash, "coin_price_usd": block.coin_price_usd, } diff --git a/bitcoinetl/mappers/transaction_input_mapper.py b/bitcoinetl/mappers/transaction_input_mapper.py index 9d58058..468ed04 100644 --- a/bitcoinetl/mappers/transaction_input_mapper.py +++ b/bitcoinetl/mappers/transaction_input_mapper.py @@ -25,24 +25,28 @@ class BtcTransactionInputMapper(object): - def vin_to_inputs(self, vin): + def vin_to_inputs(self, vin, spending_transaction_id=None): inputs = [] index = 0 for item in (vin or []): - input = self.json_dict_to_input(item) + input = self.json_dict_to_input(json_dict=item, spending_transaction_id=spending_transaction_id) input.index = index index = index + 1 inputs.append(input) return inputs - def json_dict_to_input(self, json_dict): + def json_dict_to_input(self, json_dict, spending_transaction_id=None): input = BtcTransactionInput() - input.spent_transaction_hash = json_dict.get('txid') - input.spent_output_index = json_dict.get('vout') + input.create_transaction_id = json_dict.get('txid') + input.create_output_index = json_dict.get('vout') + + input.spending_transaction_id = spending_transaction_id + input.coinbase_param = json_dict.get('coinbase') input.sequence = json_dict.get('sequence') + if 'scriptSig' in json_dict: input.script_asm = (json_dict.get('scriptSig')).get('asm') input.script_hex = (json_dict.get('scriptSig')).get('hex') @@ -54,16 +58,21 @@ def inputs_to_dicts(self, inputs): for input in inputs: item = { 'index': input.index, - 'spent_transaction_hash': input.spent_transaction_hash, - 'spent_output_index': input.spent_output_index, + 'create_transaction_id': input.create_transaction_id, + 'spending_transaction_id': input.spending_transaction_id, + 'create_output_index': input.create_output_index, + 'sequence': input.sequence, + 'script_asm': input.script_asm, 'script_hex': input.script_hex, - 'sequence': input.sequence, + 'required_signatures': input.required_signatures, - 'type': input.type, 'addresses': input.addresses, 'value': input.value, + 'type': input.type, } + if input.coinbase_param: + item['coinbase_param'] = input.coinbase_param result.append(item) return result @@ -72,8 +81,8 @@ def dicts_to_inputs(self, dicts): for dict in dicts: input = BtcTransactionInput() input.index = dict.get('index') - input.spent_transaction_hash = dict.get('spent_transaction_hash') - input.spent_output_index = dict.get('spent_output_index') + input.create_transaction_id = dict.get('create_transaction_id') + input.create_output_index = dict.get('create_output_index') input.script_asm = dict.get('script_asm') input.script_hex = dict.get('script_hex') input.sequence = dict.get('sequence') @@ -81,6 +90,7 @@ def dicts_to_inputs(self, dicts): input.type = dict.get('type') input.addresses = dict.get('addresses') input.value = dict.get('value') + input.spending_transaction_id = dict.get('spending_transaction_id') result.append(input) return result diff --git a/bitcoinetl/mappers/transaction_mapper.py b/bitcoinetl/mappers/transaction_mapper.py index 511b0cb..cfffad6 100644 --- a/bitcoinetl/mappers/transaction_mapper.py +++ b/bitcoinetl/mappers/transaction_mapper.py @@ -36,13 +36,14 @@ def __init__(self): self.transaction_output_mapper = BtcTransactionOutputMapper() self.join_split_mapper = BtcJoinSplitMapper() - def json_dict_to_transaction(self, json_dict, block=None, index=None): + def json_dict_to_transaction(self, json_dict, block=None, index=None, coin_price_usd=None): transaction = BtcTransaction() transaction.hash = json_dict.get('txid') transaction.size = json_dict.get('size') transaction.virtual_size = json_dict.get('vsize') transaction.version = json_dict.get('version') transaction.lock_time = json_dict.get('locktime') + transaction.transaction_id = json_dict.get('txid') if block is not None: transaction.block_number = block.number @@ -58,19 +59,33 @@ def json_dict_to_transaction(self, json_dict, block=None, index=None): if index is not None: transaction.index = index - transaction.inputs = self.transaction_input_mapper.vin_to_inputs(json_dict.get('vin')) - transaction.outputs = self.transaction_output_mapper.vout_to_outputs(json_dict.get('vout')) + transaction.inputs = self.transaction_input_mapper.vin_to_inputs( + vin=json_dict.get('vin'), + spending_transaction_id=transaction.transaction_id + ) + transaction.outputs = self.transaction_output_mapper.vout_to_outputs( + vout=json_dict.get('vout'), + create_transaction_id=transaction.transaction_id + ) # Only Zcash transaction.join_splits = self.join_split_mapper.vjoinsplit_to_join_splits(json_dict.get('vjoinsplit')) transaction.value_balance = bitcoin_to_satoshi(json_dict.get('valueBalance')) + # New fields + transaction.coin_price_usd = coin_price_usd + transaction.weight = json_dict.get('weight') + transaction.output_addresses = self.get_output_addresses(transaction) return transaction + def get_output_addresses(self, transaction): + return [','.join(output.addresses) if output.addresses else output.addresses for output in transaction.outputs] + def transaction_to_dict(self, transaction): result = { 'type': 'transaction', 'hash': transaction.hash, + 'transaction_id': transaction.transaction_id, 'size': transaction.size, 'virtual_size': transaction.virtual_size, 'version': transaction.version, @@ -90,12 +105,15 @@ def transaction_to_dict(self, transaction): 'output_value': transaction.calculate_output_value(), 'fee': transaction.calculate_fee(), 'coin_price_usd': transaction.coin_price_usd, + 'weight': transaction.weight, + 'output_addresses': transaction.output_addresses } return result def dict_to_transaction(self, dict): transaction = BtcTransaction() transaction.hash = dict.get('hash') + transaction.transaction_id = dict.get('transaction_id') transaction.size = dict.get('size') transaction.virtual_size = dict.get('virtual_size') transaction.version = dict.get('version') @@ -106,6 +124,14 @@ def dict_to_transaction(self, dict): transaction.is_coinbase = dict.get('is_coinbase') transaction.index = dict.get('index') transaction.coin_price_usd = dict.get('coin_price_usd') + transaction.weight = dict.get('weight') + transaction.output_addresses = dict.get('output_addresses') + transaction.input_addresses = dict.get('input_addresses') + transaction.input_count = dict.get('input_count') + transaction.input_value = dict.get('input_value') + transaction.output_count = dict.get('output_count') + transaction.output_value = dict.get('output_value') + transaction.fee = dict.get('fee') transaction.inputs = self.transaction_input_mapper.dicts_to_inputs(dict.get('inputs')) transaction.outputs = self.transaction_output_mapper.dicts_to_outputs(dict.get('outputs')) diff --git a/bitcoinetl/mappers/transaction_output_mapper.py b/bitcoinetl/mappers/transaction_output_mapper.py index 6974006..1466db5 100644 --- a/bitcoinetl/mappers/transaction_output_mapper.py +++ b/bitcoinetl/mappers/transaction_output_mapper.py @@ -26,20 +26,22 @@ class BtcTransactionOutputMapper(object): - def vout_to_outputs(self, vout): + def vout_to_outputs(self, vout, create_transaction_id=None): outputs = [] for item in (vout or []): - output = self.json_dict_to_output(item) + output = self.json_dict_to_output(json_dict=item, create_transaction_id=create_transaction_id) outputs.append(output) return outputs - def json_dict_to_output(self, json_dict): + def json_dict_to_output(self, json_dict, create_transaction_id=None): output = BtcTransactionOutput() output.index = json_dict.get('n') output.addresses = json_dict.get('addresses') output.txinwitness = json_dict.get('txinwitness') output.value = bitcoin_to_satoshi(json_dict.get('value')) + output.create_transaction_id = create_transaction_id + if 'scriptPubKey' in json_dict: script_pub_key = json_dict.get('scriptPubKey') output.script_asm = script_pub_key.get('asm') @@ -55,13 +57,20 @@ def outputs_to_dicts(self, outputs): for output in outputs: item = { 'index': output.index, + 'create_transaction_id': output.create_transaction_id, + 'spending_transaction_id': None, + 'script_asm': output.script_asm, 'script_hex': output.script_hex, - 'required_signatures': output.required_signatures, + 'type': output.type, 'addresses': output.addresses, - 'value': output.value + 'value': output.value, + 'required_signatures': output.required_signatures, } + if output.txinwitness: + item['witness'] = output.txinwitness + result.append(item) return result @@ -76,6 +85,9 @@ def dicts_to_outputs(self, dicts): input.type = dict.get('type') input.addresses = dict.get('addresses') input.value = dict.get('value') + input.witness = dict.get('witness') + input.create_transaction_id = dict.get('create_transaction_id') + input.spending_transaction_id = dict.get('spending_transaction_id') result.append(input) return result diff --git a/bitcoinetl/service/btc_service.py b/bitcoinetl/service/btc_service.py index 83ea034..0f71b94 100644 --- a/bitcoinetl/service/btc_service.py +++ b/bitcoinetl/service/btc_service.py @@ -156,6 +156,7 @@ def _remove_coinbase_input(self, block): if block.has_full_transactions(): for transaction in block.transactions: coinbase_inputs = [input for input in transaction.inputs if input.is_coinbase()] + if len(coinbase_inputs) > 1: raise ValueError('There must be no more than 1 coinbase input in any transaction. Was {}, hash {}' .format(len(coinbase_inputs), transaction.hash)) @@ -165,6 +166,14 @@ def _remove_coinbase_input(self, block): transaction.inputs = [input for input in transaction.inputs if not input.is_coinbase()] transaction.is_coinbase = True + block.coinbase_param = coinbase_input.coinbase_param + block.coinbase_param_decoded = bytes.fromhex(coinbase_input.coinbase_param).decode('utf-8', 'replace') + block.coinbase_tx = transaction + block.coinbase_txid = transaction.transaction_id + + block.block_reward = self.get_block_reward(block) + transaction.input_count = 0 + def _add_non_standard_addresses(self, transaction): for output in transaction.outputs: if output.addresses is None or len(output.addresses) == 0: @@ -198,22 +207,9 @@ def _add_shielded_inputs_and_outputs(self, transaction): output.value = -transaction.value_balance transaction.add_output(output) - def non_coinbase_txs(self, block): - return [transaction - for transaction in block.transactions - if not transaction.transaction_id != block.coinbase_tx - ] - - def get_transaction_ids(self, block): - return [tx.transaction_id for tx in block.transactions] - def get_block_reward(self, block): return block.coinbase_tx.calculate_output_value() - def get_input_value(self, block): - non_coinbase_txs = self.non_coinbase_txs(block) - return sum([tx.calculate_input_value() for tx in non_coinbase_txs]) - def _add_coin_price_to_blocks(self, blocks, coin_price_type): from_currency_code = Chain.ticker_symbol(self.chain) From 6ffd22662119c8cf8d89e70b43b1399bbad29131 Mon Sep 17 00:00:00 2001 From: Nirmal Date: Wed, 30 Oct 2019 21:52:37 +0530 Subject: [PATCH 04/51] update txn mapper, adds transaction_id --- bitcoinetl/mappers/transaction_mapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bitcoinetl/mappers/transaction_mapper.py b/bitcoinetl/mappers/transaction_mapper.py index cfffad6..c1094b6 100644 --- a/bitcoinetl/mappers/transaction_mapper.py +++ b/bitcoinetl/mappers/transaction_mapper.py @@ -38,7 +38,7 @@ def __init__(self): def json_dict_to_transaction(self, json_dict, block=None, index=None, coin_price_usd=None): transaction = BtcTransaction() - transaction.hash = json_dict.get('txid') + transaction.hash = json_dict.get('hash') transaction.size = json_dict.get('size') transaction.virtual_size = json_dict.get('vsize') transaction.version = json_dict.get('version') From 24c6693a7d3875cf1450a447bd0ecf128af2e91e Mon Sep 17 00:00:00 2001 From: Nirmal Date: Mon, 4 Nov 2019 13:14:24 +0530 Subject: [PATCH 05/51] syncs legacy export schema with current schema --- bitcoinetl/domain/transaction.py | 2 -- .../jobs/exporters/blocks_and_transactions_item_exporter.py | 3 --- bitcoinetl/mappers/block_mapper.py | 2 ++ 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/bitcoinetl/domain/transaction.py b/bitcoinetl/domain/transaction.py index 2455457..de66cad 100644 --- a/bitcoinetl/domain/transaction.py +++ b/bitcoinetl/domain/transaction.py @@ -51,8 +51,6 @@ def __init__(self): self.output_count = None self.output_value = None self.coin_price_usd = None - self.output_addresses = [] - self.input_addresses = [] def add_input(self, input): if len(self.inputs) > 0: diff --git a/bitcoinetl/jobs/exporters/blocks_and_transactions_item_exporter.py b/bitcoinetl/jobs/exporters/blocks_and_transactions_item_exporter.py index a6bbd58..764a5ad 100644 --- a/bitcoinetl/jobs/exporters/blocks_and_transactions_item_exporter.py +++ b/bitcoinetl/jobs/exporters/blocks_and_transactions_item_exporter.py @@ -70,9 +70,6 @@ 'output_count', 'input_value', 'output_value', - 'fee', - 'input_addresses', - 'output_addresses', 'inputs', 'outputs', 'coin_price_usd', diff --git a/bitcoinetl/mappers/block_mapper.py b/bitcoinetl/mappers/block_mapper.py index ddf1fce..806d391 100644 --- a/bitcoinetl/mappers/block_mapper.py +++ b/bitcoinetl/mappers/block_mapper.py @@ -67,6 +67,7 @@ def json_dict_to_block(self, json_dict): block.coinbase_txid = json_dict.get("coinbase_txid") block.previous_block_hash = json_dict.get("previousblockhash") block.coin_price_usd = json_dict.get('coin_price_usd') + block.transaction_ids = [tx.transaction_id for tx in block.transactions] return block def block_to_dict(self, block): @@ -93,6 +94,7 @@ def block_to_dict(self, block): 'chain_work': block.chain_work, 'previous_block_hash': block.previous_block_hash, "coin_price_usd": block.coin_price_usd, + "transaction_ids": block.transaction_ids } From 2c6367f633dfb60a3b335133f5666e5cd0d32dde Mon Sep 17 00:00:00 2001 From: Nirmal Date: Mon, 11 Nov 2019 10:55:22 +0530 Subject: [PATCH 06/51] removing unused fields --- .../jobs/exporters/blocks_and_transactions_item_exporter.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/bitcoinetl/jobs/exporters/blocks_and_transactions_item_exporter.py b/bitcoinetl/jobs/exporters/blocks_and_transactions_item_exporter.py index 764a5ad..243d20b 100644 --- a/bitcoinetl/jobs/exporters/blocks_and_transactions_item_exporter.py +++ b/bitcoinetl/jobs/exporters/blocks_and_transactions_item_exporter.py @@ -34,7 +34,6 @@ "coinbase_param_decoded", "coinbase_txid", "previous_block_hash", - "next_block_hash", "nonce", "difficulty", "chain_work", @@ -45,9 +44,7 @@ "weight", "bits", "transaction_count", - "transaction_fees", "block_reward", - "input_value", "transaction_ids", "coin_price_usd", ] From 03dbcadf8a410df8ea6634ee359ce41401eac319 Mon Sep 17 00:00:00 2001 From: Nirmal Date: Tue, 12 Nov 2019 16:17:21 +0530 Subject: [PATCH 07/51] fix for tranasction ids --- bitcoinetl/mappers/block_mapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bitcoinetl/mappers/block_mapper.py b/bitcoinetl/mappers/block_mapper.py index 806d391..7a24dd3 100644 --- a/bitcoinetl/mappers/block_mapper.py +++ b/bitcoinetl/mappers/block_mapper.py @@ -52,6 +52,7 @@ def json_dict_to_block(self, json_dict): block.transactions = [ self.transaction_mapper.json_dict_to_transaction(tx, block, idx) for idx, tx in enumerate(raw_transactions) ] + block.transaction_ids = [tx.transaction_id for tx in block.transactions] else: # Transaction hashes block.transactions = raw_transactions @@ -67,7 +68,6 @@ def json_dict_to_block(self, json_dict): block.coinbase_txid = json_dict.get("coinbase_txid") block.previous_block_hash = json_dict.get("previousblockhash") block.coin_price_usd = json_dict.get('coin_price_usd') - block.transaction_ids = [tx.transaction_id for tx in block.transactions] return block def block_to_dict(self, block): From f4b08024a48ee0b7595bfeadc162ded8074d7a0b Mon Sep 17 00:00:00 2001 From: Nirmal Date: Tue, 12 Nov 2019 16:19:48 +0530 Subject: [PATCH 08/51] handle case of lower verbosity --- bitcoinetl/mappers/block_mapper.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bitcoinetl/mappers/block_mapper.py b/bitcoinetl/mappers/block_mapper.py index 7a24dd3..d72e7f5 100644 --- a/bitcoinetl/mappers/block_mapper.py +++ b/bitcoinetl/mappers/block_mapper.py @@ -56,6 +56,7 @@ def json_dict_to_block(self, json_dict): else: # Transaction hashes block.transactions = raw_transactions + block.transaction_ids = raw_transactions block.transaction_count = len(raw_transactions) From 899bea64d383f49e2d96c3aff9a3e280b78c184f Mon Sep 17 00:00:00 2001 From: Nirmal Date: Tue, 12 Nov 2019 18:28:30 +0530 Subject: [PATCH 09/51] try caching cryptocompare prices --- bitcoinetl/service/btc_service.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/bitcoinetl/service/btc_service.py b/bitcoinetl/service/btc_service.py index 0f71b94..af71bc1 100644 --- a/bitcoinetl/service/btc_service.py +++ b/bitcoinetl/service/btc_service.py @@ -46,6 +46,7 @@ def __init__(self, bitcoin_rpc, chain=Chain.BITCOIN, coin_price_type=CoinPriceTy self.transaction_mapper = BtcTransactionMapper() self.chain = chain self.coin_price_type = coin_price_type + self.cached_prices = {} def get_block(self, block_number, with_transactions=False): block_hashes = self.get_block_hashes([block_number]) @@ -219,22 +220,26 @@ def _add_coin_price_to_blocks(self, blocks, coin_price_type): elif coin_price_type == CoinPriceType.hourly: block_hour_ids = list(set([get_hour_id_from_ts(block.timestamp) for block in blocks])) block_hours_ts = {hour_id: get_ts_from_hour_id(hour_id) for hour_id in block_hour_ids} - coin_price_hours = { - hour_id: get_coin_price(from_currency_code=from_currency_code, timestamp=hour_ts, resource="histohour") - for hour_id, hour_ts in block_hours_ts.items() - } + + for hour_id, hour_ts in block_hours_ts.items(): + if hour_id in self.cached_prices: + continue + + self.cached_prices[hour_id] = get_coin_price(from_currency_code=from_currency_code, timestamp=hour_ts, resource="histohour") for block in blocks: block_hour_id = get_hour_id_from_ts(block.timestamp) - block.coin_price_usd = coin_price_hours[block_hour_id] + block.coin_price_usd = self.cached_prices[block_hour_id] elif coin_price_type == CoinPriceType.daily: block_day_ids = list(set([get_day_id_from_ts(block.timestamp) for block in blocks])) block_days_ts = {day_id: get_ts_from_day_id(day_id) for day_id in block_day_ids} - coin_price_days = { - day_id: get_coin_price(from_currency_code=from_currency_code, timestamp=day_ts, resource="histoday") - for day_id, day_ts in block_days_ts.items() - } + + for day_id, day_ts in block_days_ts.items(): + if day_id in self.cached_prices: + continue + + self.cached_prices[day_id] = get_coin_price(from_currency_code=from_currency_code, timestamp=day_ts, resource="histoday") for block in blocks: block_day_id = get_day_id_from_ts(block.timestamp) From 09f661e2a1ded4758363ec91b0037c23381701b3 Mon Sep 17 00:00:00 2001 From: Nirmal Date: Tue, 12 Nov 2019 18:33:21 +0530 Subject: [PATCH 10/51] reading from cache for daily prices --- bitcoinetl/service/btc_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bitcoinetl/service/btc_service.py b/bitcoinetl/service/btc_service.py index af71bc1..c2bcc44 100644 --- a/bitcoinetl/service/btc_service.py +++ b/bitcoinetl/service/btc_service.py @@ -243,7 +243,7 @@ def _add_coin_price_to_blocks(self, blocks, coin_price_type): for block in blocks: block_day_id = get_day_id_from_ts(block.timestamp) - block.coin_price_usd = coin_price_days[block_day_id] + block.coin_price_usd = self.cached_prices[block_day_id] def _add_coin_price_to_transaction(self, transaction, coin_price_usd): transaction.coin_price_usd = coin_price_usd From e009debdb0e5677f1b5e4d44a71bdb8743ea84b3 Mon Sep 17 00:00:00 2001 From: Nirmal Date: Wed, 13 Nov 2019 16:07:16 +0530 Subject: [PATCH 11/51] increase tiemout - bitcoincash failures --- bitcoinetl/rpc/bitcoin_rpc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bitcoinetl/rpc/bitcoin_rpc.py b/bitcoinetl/rpc/bitcoin_rpc.py index 498b64f..a9828c4 100644 --- a/bitcoinetl/rpc/bitcoin_rpc.py +++ b/bitcoinetl/rpc/bitcoin_rpc.py @@ -28,7 +28,7 @@ class BitcoinRpc: - def __init__(self, provider_uri, timeout=60): + def __init__(self, provider_uri, timeout=180): self.provider_uri = provider_uri self.timeout = timeout From 63aa436cf3f9c8df03b7eb286cc034a845dc70d2 Mon Sep 17 00:00:00 2001 From: Saurabh Daga Date: Mon, 9 Mar 2020 23:12:14 +0530 Subject: [PATCH 12/51] stream --- bitcoinetl/cli/stream.py | 9 +++++--- bitcoinetl/jobs/enrich_transactions.py | 22 +++++++++---------- .../mappers/transaction_output_mapper.py | 6 ++--- bitcoinetl/streaming/btc_streamer_adapter.py | 12 +++++++--- blockchainetl/cryptocompare.py | 2 +- dockerhub.md | 4 ++-- 6 files changed, 32 insertions(+), 23 deletions(-) diff --git a/bitcoinetl/cli/stream.py b/bitcoinetl/cli/stream.py index 46170df..59d5d07 100644 --- a/bitcoinetl/cli/stream.py +++ b/bitcoinetl/cli/stream.py @@ -22,7 +22,7 @@ import click -from bitcoinetl.enumeration.chain import Chain +from bitcoinetl.enumeration.chain import Chain, CoinPriceType from bitcoinetl.rpc.bitcoin_rpc import BitcoinRpc from blockchainetl.logging_utils import logging_basic_config @@ -50,9 +50,11 @@ @click.option('--log-file', default=None, type=str, help='Log file.') @click.option('--pid-file', default=None, type=str, help='pid file.') @click.option('--enrich', default=True, type=bool, help='Enable filling in transactions inputs fields.') +@click.option('--coin-price-type', default=CoinPriceType.hourly, type=int, + help='Enable querying CryptoCompare for coin prices. 0 for no price, 1 for daily price, 2 for hourly price.') def stream(last_synced_block_file, lag, provider_uri, output, start_block, chain=Chain.BITCOIN, period_seconds=10, batch_size=2, block_batch_size=10, max_workers=5, log_file=None, pid_file=None, - enrich=True): + enrich=True, coin_price_type=CoinPriceType.hourly): """Streams all data types to console or Google Pub/Sub.""" configure_logging(log_file) configure_signals() @@ -67,7 +69,8 @@ def stream(last_synced_block_file, lag, provider_uri, output, start_block, chain chain=chain, batch_size=batch_size, enable_enrich=enrich, - max_workers=max_workers + max_workers=max_workers, + coin_price_type=coin_price_type, ) streamer = Streamer( blockchain_streamer_adapter=streamer_adapter, diff --git a/bitcoinetl/jobs/enrich_transactions.py b/bitcoinetl/jobs/enrich_transactions.py index a100f84..dec7dd7 100644 --- a/bitcoinetl/jobs/enrich_transactions.py +++ b/bitcoinetl/jobs/enrich_transactions.py @@ -63,7 +63,7 @@ def _enrich_transactions(self, transactions): input_transactions_map = self._get_input_transactions_as_map(transaction_input_batch) for input in transaction_input_batch: output = self._get_output_for_input(input, input_transactions_map) \ - if input.spent_transaction_hash is not None else None + if input.create_transaction_id is not None else None if output is not None: input.required_signatures = output.required_signatures input.type = output.type @@ -74,29 +74,29 @@ def _enrich_transactions(self, transactions): self.item_exporter.export_item(self.transaction_mapper.transaction_to_dict(transaction)) def _get_input_transactions_as_map(self, transaction_inputs): - transaction_hashes = [input.spent_transaction_hash for input in transaction_inputs - if input.spent_transaction_hash is not None] + transaction_hashes = [input.create_transaction_id for input in transaction_inputs + if input.create_transaction_id is not None] transaction_hashes = set(transaction_hashes) if len(transaction_hashes) > 0: transactions = self.btc_service.get_transactions_by_hashes(transaction_hashes) - return {transaction.hash: transaction for transaction in transactions} + return {transaction.transaction_id: transaction for transaction in transactions} else: return {} def _get_output_for_input(self, transaction_input, input_transactions_map): - spent_transaction_hash = transaction_input.spent_transaction_hash - input_transaction = input_transactions_map.get(spent_transaction_hash) + create_transaction_id = transaction_input.create_transaction_id + input_transaction = input_transactions_map.get(create_transaction_id) if input_transaction is None: - raise ValueError('Input transaction with hash {} not found'.format(spent_transaction_hash)) + raise ValueError('Input transaction with hash {} not found'.format(create_transaction_id)) - spent_output_index = transaction_input.spent_output_index - if input_transaction.outputs is None or len(input_transaction.outputs) < (spent_output_index + 1): + create_output_index = transaction_input.create_output_index + if input_transaction.outputs is None or len(input_transaction.outputs) < (create_output_index + 1): raise ValueError( 'There is no output with index {} in transaction with hash {}'.format( - spent_output_index, spent_transaction_hash)) + create_output_index, create_transaction_id)) - output = input_transaction.outputs[spent_output_index] + output = input_transaction.outputs[create_output_index] return output def _end(self): diff --git a/bitcoinetl/mappers/transaction_output_mapper.py b/bitcoinetl/mappers/transaction_output_mapper.py index 1466db5..3315917 100644 --- a/bitcoinetl/mappers/transaction_output_mapper.py +++ b/bitcoinetl/mappers/transaction_output_mapper.py @@ -38,7 +38,7 @@ def json_dict_to_output(self, json_dict, create_transaction_id=None): output.index = json_dict.get('n') output.addresses = json_dict.get('addresses') - output.txinwitness = json_dict.get('txinwitness') + output.witness = json_dict.get('txinwitness') output.value = bitcoin_to_satoshi(json_dict.get('value')) output.create_transaction_id = create_transaction_id @@ -68,8 +68,8 @@ def outputs_to_dicts(self, outputs): 'value': output.value, 'required_signatures': output.required_signatures, } - if output.txinwitness: - item['witness'] = output.txinwitness + if output.witness: + item['witness'] = output.witness result.append(item) return result diff --git a/bitcoinetl/streaming/btc_streamer_adapter.py b/bitcoinetl/streaming/btc_streamer_adapter.py index 3b35847..1ef0ea2 100644 --- a/bitcoinetl/streaming/btc_streamer_adapter.py +++ b/bitcoinetl/streaming/btc_streamer_adapter.py @@ -23,7 +23,7 @@ import logging -from bitcoinetl.enumeration.chain import Chain +from bitcoinetl.enumeration.chain import Chain, CoinPriceType from bitcoinetl.jobs.enrich_transactions import EnrichTransactionsJob from bitcoinetl.jobs.export_blocks_job import ExportBlocksJob from bitcoinetl.service.btc_service import BtcService @@ -39,7 +39,8 @@ def __init__( chain=Chain.BITCOIN, batch_size=2, enable_enrich=True, - max_workers=5): + max_workers=5, + coin_price_type=CoinPriceType.empty): self.bitcoin_rpc = bitcoin_rpc self.chain = chain self.btc_service = BtcService(bitcoin_rpc, chain) @@ -47,6 +48,7 @@ def __init__( self.batch_size = batch_size self.enable_enrich = enable_enrich self.max_workers = max_workers + self.coin_price_type = coin_price_type def open(self): self.item_exporter.open() @@ -67,7 +69,8 @@ def export_all(self, start_block, end_block): item_exporter=blocks_and_transactions_item_exporter, chain=self.chain, export_blocks=True, - export_transactions=True + export_transactions=True, + coin_price_type=self.coin_price_type ) blocks_and_transactions_job.run() @@ -93,6 +96,9 @@ def export_all(self, start_block, end_block): transactions = enriched_transactions logging.info('Exporting with ' + type(self.item_exporter).__name__) + logging.info('Block number ' + str(len(blocks))) + logging.info('Blocks ' + str(blocks)) + logging.info('Transaction length ' + str(len(transactions))) self.item_exporter.export_items(blocks + transactions) def close(self): diff --git a/blockchainetl/cryptocompare.py b/blockchainetl/cryptocompare.py index ffd835d..61563b5 100644 --- a/blockchainetl/cryptocompare.py +++ b/blockchainetl/cryptocompare.py @@ -27,7 +27,7 @@ from datetime import datetime, timedelta -CRYPTOCOMPARE_API_KEY = os.getenv("CRYPTOCOMPARE_API_KEY", "") +CRYPTOCOMPARE_API_KEY = os.getenv("CRYPTOCOMPARE_API_KEY", "45440cef8b259c4891f26c696936a668c4304e48f19ed1f207eb142ca905c20a") class CryptoCompareRequestException(Exception): diff --git a/dockerhub.md b/dockerhub.md index 37e835c..974f861 100644 --- a/dockerhub.md +++ b/dockerhub.md @@ -3,8 +3,8 @@ ```bash > BITCOINETL_STREAMING_VERSION=1.3.0-streaming > docker build -t bitcoin-etl:${BITCOINETL_STREAMING_VERSION} -f Dockerfile_with_streaming . -> docker tag bitcoin-etl:${BITCOINETL_STREAMING_VERSION} blockchainetl/bitcoin-etl:${BITCOINETL_STREAMING_VERSION} -> docker push blockchainetl/bitcoin-etl:${BITCOINETL_STREAMING_VERSION} +> docker tag bitcoin-etl:${BITCOINETL_STREAMING_VERSION} merklescience/bitcoin-etl:${BITCOINETL_STREAMING_VERSION} +> docker push merklescience/bitcoin-etl:${BITCOINETL_STREAMING_VERSION} > docker tag bitcoin-etl:${BITCOINETL_STREAMING_VERSION} blockchainetl/bitcoin-etl:latest-streaming > docker push blockchainetl/bitcoin-etl:latest-streaming From a62b8a78c610a2873b819c17c34d56cdb2a0dced Mon Sep 17 00:00:00 2001 From: Saurabh Daga Date: Tue, 10 Mar 2020 00:43:19 +0530 Subject: [PATCH 13/51] countainer command --- README_CONTAINER_REGISTRY.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 README_CONTAINER_REGISTRY.md diff --git a/README_CONTAINER_REGISTRY.md b/README_CONTAINER_REGISTRY.md new file mode 100644 index 0000000..27882b1 --- /dev/null +++ b/README_CONTAINER_REGISTRY.md @@ -0,0 +1,4 @@ +BITCOINETL_STREAMING_VERSION=1.3.5-streaming + docker build -t merklescience/bitcoin-etl:${BITCOINETL_STREAMING_VERSION} -f Dockerfile_with_streaming . + docker tag merklescience/bitcoin-etl:${BITCOINETL_STREAMING_VERSION} us.gcr.io/staging-btc-etl/merklescience/bitcoin-etl:${BITCOINETL_STREAMING_VERSION} + docker push us.gcr.io/staging-btc-etl/merklescience/bitcoin-etl:${BITCOINETL_STREAMING_VERSION} From ca85867814c3d4a2230cf4a544bc9d22eb463062 Mon Sep 17 00:00:00 2001 From: saurabhdaga-merkle Date: Thu, 25 Feb 2021 13:37:54 +0530 Subject: [PATCH 14/51] fix streaming --- README_CONTAINER_REGISTRY.md | 2 +- bitcoinetl/cli/stream.py | 6 +++--- blockchainetl/cryptocompare.py | 2 +- blockchainetl/jobs/exporters/google_pubsub_item_exporter.py | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/README_CONTAINER_REGISTRY.md b/README_CONTAINER_REGISTRY.md index 27882b1..8cd39d3 100644 --- a/README_CONTAINER_REGISTRY.md +++ b/README_CONTAINER_REGISTRY.md @@ -1,4 +1,4 @@ -BITCOINETL_STREAMING_VERSION=1.3.5-streaming +BITCOINETL_STREAMING_VERSION=1.3.8-streaming docker build -t merklescience/bitcoin-etl:${BITCOINETL_STREAMING_VERSION} -f Dockerfile_with_streaming . docker tag merklescience/bitcoin-etl:${BITCOINETL_STREAMING_VERSION} us.gcr.io/staging-btc-etl/merklescience/bitcoin-etl:${BITCOINETL_STREAMING_VERSION} docker push us.gcr.io/staging-btc-etl/merklescience/bitcoin-etl:${BITCOINETL_STREAMING_VERSION} diff --git a/bitcoinetl/cli/stream.py b/bitcoinetl/cli/stream.py index 59d5d07..917feac 100644 --- a/bitcoinetl/cli/stream.py +++ b/bitcoinetl/cli/stream.py @@ -43,8 +43,8 @@ 'If not specified will print to console.') @click.option('-s', '--start-block', default=None, type=int, help='Start block.') @click.option('-c', '--chain', default=Chain.BITCOIN, type=click.Choice(Chain.ALL), help='The type of chain.') -@click.option('--period-seconds', default=10, type=int, help='How many seconds to sleep between syncs.') -@click.option('-b', '--batch-size', default=2, type=int, help='How many blocks to batch in single request.') +@click.option('--period-seconds', default=1, type=int, help='How many seconds to sleep between syncs.') +@click.option('-b', '--batch-size', default=1, type=int, help='How many blocks to batch in single request.') @click.option('-B', '--block-batch-size', default=10, type=int, help='How many blocks to batch in single sync round.') @click.option('-w', '--max-workers', default=5, type=int, help='The number of workers.') @click.option('--log-file', default=None, type=str, help='Log file.') @@ -53,7 +53,7 @@ @click.option('--coin-price-type', default=CoinPriceType.hourly, type=int, help='Enable querying CryptoCompare for coin prices. 0 for no price, 1 for daily price, 2 for hourly price.') def stream(last_synced_block_file, lag, provider_uri, output, start_block, chain=Chain.BITCOIN, - period_seconds=10, batch_size=2, block_batch_size=10, max_workers=5, log_file=None, pid_file=None, + period_seconds=1, batch_size=1, block_batch_size=10, max_workers=5, log_file=None, pid_file=None, enrich=True, coin_price_type=CoinPriceType.hourly): """Streams all data types to console or Google Pub/Sub.""" configure_logging(log_file) diff --git a/blockchainetl/cryptocompare.py b/blockchainetl/cryptocompare.py index 61563b5..c9d92c8 100644 --- a/blockchainetl/cryptocompare.py +++ b/blockchainetl/cryptocompare.py @@ -27,7 +27,7 @@ from datetime import datetime, timedelta -CRYPTOCOMPARE_API_KEY = os.getenv("CRYPTOCOMPARE_API_KEY", "45440cef8b259c4891f26c696936a668c4304e48f19ed1f207eb142ca905c20a") +CRYPTOCOMPARE_API_KEY = os.getenv("CRYPTOCOMPARE_API_KEY", "12e40a21ca4933874e0242bf0fde2b5d1f99304ca431dca5d541f3be03c0ef66") class CryptoCompareRequestException(Exception): diff --git a/blockchainetl/jobs/exporters/google_pubsub_item_exporter.py b/blockchainetl/jobs/exporters/google_pubsub_item_exporter.py index fba6f09..0609389 100644 --- a/blockchainetl/jobs/exporters/google_pubsub_item_exporter.py +++ b/blockchainetl/jobs/exporters/google_pubsub_item_exporter.py @@ -74,9 +74,9 @@ def close(self): def create_publisher(): batch_settings = pubsub_v1.types.BatchSettings( - max_bytes=1024 * 5, # 5 kilobytes - max_latency=1, # 1 second - max_messages=1000, + max_bytes=1024 * 10, # 5 kilobytes + max_latency=0.01, # 1 second + max_messages=10000, ) return pubsub_v1.PublisherClient(batch_settings) From e12dbbb6666402c4e4529d09cfcba07048d0c4ec Mon Sep 17 00:00:00 2001 From: saurabhdaga-merkle Date: Mon, 8 Mar 2021 08:21:40 +0530 Subject: [PATCH 15/51] add bsv --- bitcoinetl/enumeration/chain.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bitcoinetl/enumeration/chain.py b/bitcoinetl/enumeration/chain.py index f115d9b..4f164c2 100644 --- a/bitcoinetl/enumeration/chain.py +++ b/bitcoinetl/enumeration/chain.py @@ -1,6 +1,7 @@ class Chain: BITCOIN = 'bitcoin' BITCOIN_CASH = 'bitcoin_cash' + BITCOIN_SV = 'bitcoin_sv' DOGECOIN = 'dogecoin' LITECOIN = 'litecoin' DASH = 'dash' @@ -16,6 +17,7 @@ def ticker_symbol(cls, chain): symbols = { 'bitcoin': 'BTC', 'bitcoin_cash': 'BCH', + 'bitcoin_sv': 'BSV', 'dogecoin': 'DOGE', 'litecoin': 'LTC', 'dash': 'DASH', @@ -25,7 +27,7 @@ def ticker_symbol(cls, chain): return symbols.get(chain, None) -class CoinPriceType: +class CoinPriceType:git statu empty = 0 daily = 1 From 81943dea234b14fea0582032c4165413b650b346 Mon Sep 17 00:00:00 2001 From: saurabhdaga-merkle Date: Mon, 8 Mar 2021 08:23:20 +0530 Subject: [PATCH 16/51] add bsv --- bitcoinetl/enumeration/chain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bitcoinetl/enumeration/chain.py b/bitcoinetl/enumeration/chain.py index 4f164c2..c5419e5 100644 --- a/bitcoinetl/enumeration/chain.py +++ b/bitcoinetl/enumeration/chain.py @@ -27,7 +27,7 @@ def ticker_symbol(cls, chain): return symbols.get(chain, None) -class CoinPriceType:git statu +class CoinPriceType: empty = 0 daily = 1 From 9c175cf29013565c5553bd652511e758eef6f4cb Mon Sep 17 00:00:00 2001 From: saurabhdaga-merkle Date: Mon, 8 Mar 2021 08:25:32 +0530 Subject: [PATCH 17/51] add bsv --- bitcoinetl/enumeration/chain.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bitcoinetl/enumeration/chain.py b/bitcoinetl/enumeration/chain.py index c5419e5..fcdcf12 100644 --- a/bitcoinetl/enumeration/chain.py +++ b/bitcoinetl/enumeration/chain.py @@ -8,7 +8,7 @@ class Chain: ZCASH = 'zcash' MONACOIN = 'monacoin' - ALL = [BITCOIN, BITCOIN_CASH, DOGECOIN, LITECOIN, DASH, ZCASH, MONACOIN] + ALL = [BITCOIN, BITCOIN_CASH, BITCOIN_SV, DOGECOIN, LITECOIN, DASH, ZCASH, MONACOIN] # Old API doesn't support verbosity for getblock which doesn't allow querying all transactions in a block in 1 go. HAVE_OLD_API = [BITCOIN_CASH, DOGECOIN, DASH, MONACOIN] From fd10f2d05e90d4aa411389d75d063240ab39f110 Mon Sep 17 00:00:00 2001 From: saurabhdaga-merkle Date: Mon, 22 Mar 2021 10:22:22 +0530 Subject: [PATCH 18/51] debug api key --- blockchainetl/cryptocompare.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/blockchainetl/cryptocompare.py b/blockchainetl/cryptocompare.py index c9d92c8..572bc4d 100644 --- a/blockchainetl/cryptocompare.py +++ b/blockchainetl/cryptocompare.py @@ -118,7 +118,7 @@ def get_coin_price( payload = response.json() if payload["Type"] != 100: - raise CryptoCompareRequestException(payload.get("Message", "")) + raise CryptoCompareRequestException(payload.get("Message", "") + access_token) data = payload["Data"]["Data"] avg_price = sum(item["open"] for item in data) / len(data) From 3e9d8f6efa58acef9e99e4e44a3af085bda31813 Mon Sep 17 00:00:00 2001 From: saurabhdaga-merkle Date: Mon, 22 Mar 2021 13:34:44 +0530 Subject: [PATCH 19/51] add BSV --- bitcoinetl/enumeration/chain.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bitcoinetl/enumeration/chain.py b/bitcoinetl/enumeration/chain.py index fcdcf12..77eab05 100644 --- a/bitcoinetl/enumeration/chain.py +++ b/bitcoinetl/enumeration/chain.py @@ -1,14 +1,14 @@ class Chain: BITCOIN = 'bitcoin' BITCOIN_CASH = 'bitcoin_cash' - BITCOIN_SV = 'bitcoin_sv' + BITCOIN_CASH_SV = 'bitcoin_cash_sv' DOGECOIN = 'dogecoin' LITECOIN = 'litecoin' DASH = 'dash' ZCASH = 'zcash' MONACOIN = 'monacoin' - ALL = [BITCOIN, BITCOIN_CASH, BITCOIN_SV, DOGECOIN, LITECOIN, DASH, ZCASH, MONACOIN] + ALL = [BITCOIN, BITCOIN_CASH, BITCOIN_CASH_SV, DOGECOIN, LITECOIN, DASH, ZCASH, MONACOIN] # Old API doesn't support verbosity for getblock which doesn't allow querying all transactions in a block in 1 go. HAVE_OLD_API = [BITCOIN_CASH, DOGECOIN, DASH, MONACOIN] @@ -17,7 +17,7 @@ def ticker_symbol(cls, chain): symbols = { 'bitcoin': 'BTC', 'bitcoin_cash': 'BCH', - 'bitcoin_sv': 'BSV', + 'bitcoin_cash_sv': 'BSV', 'dogecoin': 'DOGE', 'litecoin': 'LTC', 'dash': 'DASH', From 391b3d46f1d56e51f9530da59e3ea96bd27b62e9 Mon Sep 17 00:00:00 2001 From: saurabhdaga-merkle Date: Mon, 3 May 2021 09:53:23 +0530 Subject: [PATCH 20/51] BSV changes --- README_CONTAINER_REGISTRY.md | 3 ++- bitcoinetl/cli/stream.py | 7 ++++++- .../jobs/exporters/google_pubsub_item_exporter.py | 2 +- blockchainetl/streaming/streamer.py | 8 +++++++- 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/README_CONTAINER_REGISTRY.md b/README_CONTAINER_REGISTRY.md index 8cd39d3..a5a46fc 100644 --- a/README_CONTAINER_REGISTRY.md +++ b/README_CONTAINER_REGISTRY.md @@ -1,4 +1,5 @@ -BITCOINETL_STREAMING_VERSION=1.3.8-streaming +BITCOINETL_STREAMING_VERSION=1.3.8-streaming-test docker build -t merklescience/bitcoin-etl:${BITCOINETL_STREAMING_VERSION} -f Dockerfile_with_streaming . docker tag merklescience/bitcoin-etl:${BITCOINETL_STREAMING_VERSION} us.gcr.io/staging-btc-etl/merklescience/bitcoin-etl:${BITCOINETL_STREAMING_VERSION} docker push us.gcr.io/staging-btc-etl/merklescience/bitcoin-etl:${BITCOINETL_STREAMING_VERSION} + diff --git a/bitcoinetl/cli/stream.py b/bitcoinetl/cli/stream.py index 917feac..2384984 100644 --- a/bitcoinetl/cli/stream.py +++ b/bitcoinetl/cli/stream.py @@ -50,11 +50,12 @@ @click.option('--log-file', default=None, type=str, help='Log file.') @click.option('--pid-file', default=None, type=str, help='pid file.') @click.option('--enrich', default=True, type=bool, help='Enable filling in transactions inputs fields.') +@click.option('--retry_errors', default=True, type=bool, help='Enable Retry on streaming failures') @click.option('--coin-price-type', default=CoinPriceType.hourly, type=int, help='Enable querying CryptoCompare for coin prices. 0 for no price, 1 for daily price, 2 for hourly price.') def stream(last_synced_block_file, lag, provider_uri, output, start_block, chain=Chain.BITCOIN, period_seconds=1, batch_size=1, block_batch_size=10, max_workers=5, log_file=None, pid_file=None, - enrich=True, coin_price_type=CoinPriceType.hourly): + enrich=True, retry_errors=True, coin_price_type=CoinPriceType.hourly): """Streams all data types to console or Google Pub/Sub.""" configure_logging(log_file) configure_signals() @@ -80,5 +81,9 @@ def stream(last_synced_block_file, lag, provider_uri, output, start_block, chain period_seconds=period_seconds, block_batch_size=block_batch_size, pid_file=pid_file, + retry_errors=retry_errors ) streamer.stream() + + + diff --git a/blockchainetl/jobs/exporters/google_pubsub_item_exporter.py b/blockchainetl/jobs/exporters/google_pubsub_item_exporter.py index 0609389..c20f246 100644 --- a/blockchainetl/jobs/exporters/google_pubsub_item_exporter.py +++ b/blockchainetl/jobs/exporters/google_pubsub_item_exporter.py @@ -74,7 +74,7 @@ def close(self): def create_publisher(): batch_settings = pubsub_v1.types.BatchSettings( - max_bytes=1024 * 10, # 5 kilobytes + max_bytes=1024 * 50 * 1024, # 50 kilobytes max_latency=0.01, # 1 second max_messages=10000, ) diff --git a/blockchainetl/streaming/streamer.py b/blockchainetl/streaming/streamer.py index aa8f818..1d42308 100644 --- a/blockchainetl/streaming/streamer.py +++ b/blockchainetl/streaming/streamer.py @@ -24,6 +24,7 @@ import logging import os import time +from google.api_core.exceptions import InvalidArgument from blockchainetl.streaming.streamer_adapter_stub import StreamerAdapterStub from blockchainetl.file_utils import smart_open @@ -95,7 +96,12 @@ def _sync_cycle(self): current_block, target_block, self.last_synced_block, blocks_to_sync)) if blocks_to_sync != 0: - self.blockchain_streamer_adapter.export_all(self.last_synced_block + 1, target_block) + + try: + self.blockchain_streamer_adapter.export_all(self.last_synced_block + 1, target_block) + except InvalidArgument as e: + logging.exception(f"An exception occurred while syncing block data - InvalidArgument, ERROR = {e.message}") + logging.info('Writing last synced block {}'.format(target_block)) write_last_synced_block(self.last_synced_block_file, target_block) self.last_synced_block = target_block From a0508e7e374fe64e95568afac6b2f366fa49a8c7 Mon Sep 17 00:00:00 2001 From: saurabhdaga-merkle Date: Tue, 6 Jul 2021 15:40:32 +0530 Subject: [PATCH 21/51] fixes --- README_CONTAINER_REGISTRY.md | 2 +- bitcoinetl/streaming/btc_streamer_adapter.py | 1 - blockchainetl/cryptocompare.py | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/README_CONTAINER_REGISTRY.md b/README_CONTAINER_REGISTRY.md index a5a46fc..00b9581 100644 --- a/README_CONTAINER_REGISTRY.md +++ b/README_CONTAINER_REGISTRY.md @@ -1,4 +1,4 @@ -BITCOINETL_STREAMING_VERSION=1.3.8-streaming-test +BITCOINETL_STREAMING_VERSION=1.3.8-streaming docker build -t merklescience/bitcoin-etl:${BITCOINETL_STREAMING_VERSION} -f Dockerfile_with_streaming . docker tag merklescience/bitcoin-etl:${BITCOINETL_STREAMING_VERSION} us.gcr.io/staging-btc-etl/merklescience/bitcoin-etl:${BITCOINETL_STREAMING_VERSION} docker push us.gcr.io/staging-btc-etl/merklescience/bitcoin-etl:${BITCOINETL_STREAMING_VERSION} diff --git a/bitcoinetl/streaming/btc_streamer_adapter.py b/bitcoinetl/streaming/btc_streamer_adapter.py index 1ef0ea2..a3e475c 100644 --- a/bitcoinetl/streaming/btc_streamer_adapter.py +++ b/bitcoinetl/streaming/btc_streamer_adapter.py @@ -97,7 +97,6 @@ def export_all(self, start_block, end_block): logging.info('Exporting with ' + type(self.item_exporter).__name__) logging.info('Block number ' + str(len(blocks))) - logging.info('Blocks ' + str(blocks)) logging.info('Transaction length ' + str(len(transactions))) self.item_exporter.export_items(blocks + transactions) diff --git a/blockchainetl/cryptocompare.py b/blockchainetl/cryptocompare.py index 572bc4d..842ba3b 100644 --- a/blockchainetl/cryptocompare.py +++ b/blockchainetl/cryptocompare.py @@ -27,7 +27,7 @@ from datetime import datetime, timedelta -CRYPTOCOMPARE_API_KEY = os.getenv("CRYPTOCOMPARE_API_KEY", "12e40a21ca4933874e0242bf0fde2b5d1f99304ca431dca5d541f3be03c0ef66") +CRYPTOCOMPARE_API_KEY = os.getenv("CRYPTOCOMPARE_API_KEY", "") class CryptoCompareRequestException(Exception): From 6b8a8cfd46436476022b30e5bb37f7fc98a77d61 Mon Sep 17 00:00:00 2001 From: saurabhdaga-merkle Date: Wed, 7 Jul 2021 14:17:42 +0530 Subject: [PATCH 22/51] fix decimal places --- .gitignore | 3 +++ blockchainetl/cryptocompare.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index ed2a59a..2956e43 100644 --- a/.gitignore +++ b/.gitignore @@ -50,3 +50,6 @@ venv/ ENV/ .idea/ + +*.json +*.txt \ No newline at end of file diff --git a/blockchainetl/cryptocompare.py b/blockchainetl/cryptocompare.py index 842ba3b..5a9e88f 100644 --- a/blockchainetl/cryptocompare.py +++ b/blockchainetl/cryptocompare.py @@ -122,4 +122,4 @@ def get_coin_price( data = payload["Data"]["Data"] avg_price = sum(item["open"] for item in data) / len(data) - return round(avg_price, 2) + return round(avg_price, 8) From c77ba348c8bfadb95bf099d6569899069520db61 Mon Sep 17 00:00:00 2001 From: saurabhdaga-merkle <55530487+saurabhdaga-merkle@users.noreply.github.com> Date: Wed, 11 Aug 2021 15:57:20 +0530 Subject: [PATCH 23/51] Fix for bsv (#9) remove unnecessary verbose fields "script_asm" and "script_hex" --- bitcoinetl/mappers/transaction_input_mapper.py | 12 ++++++------ bitcoinetl/mappers/transaction_output_mapper.py | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/bitcoinetl/mappers/transaction_input_mapper.py b/bitcoinetl/mappers/transaction_input_mapper.py index 468ed04..3ec0120 100644 --- a/bitcoinetl/mappers/transaction_input_mapper.py +++ b/bitcoinetl/mappers/transaction_input_mapper.py @@ -48,8 +48,8 @@ def json_dict_to_input(self, json_dict, spending_transaction_id=None): input.sequence = json_dict.get('sequence') if 'scriptSig' in json_dict: - input.script_asm = (json_dict.get('scriptSig')).get('asm') - input.script_hex = (json_dict.get('scriptSig')).get('hex') + input.script_asm = '' #(json_dict.get('scriptSig')).get('asm') + input.script_hex = '' #(json_dict.get('scriptSig')).get('hex') return input @@ -63,8 +63,8 @@ def inputs_to_dicts(self, inputs): 'create_output_index': input.create_output_index, 'sequence': input.sequence, - 'script_asm': input.script_asm, - 'script_hex': input.script_hex, + 'script_asm': '' #input.script_asm, + 'script_hex': '' #input.script_hex, 'required_signatures': input.required_signatures, 'addresses': input.addresses, @@ -83,8 +83,8 @@ def dicts_to_inputs(self, dicts): input.index = dict.get('index') input.create_transaction_id = dict.get('create_transaction_id') input.create_output_index = dict.get('create_output_index') - input.script_asm = dict.get('script_asm') - input.script_hex = dict.get('script_hex') + input.script_asm = '' #dict.get('script_asm') + input.script_hex = '' #dict.get('script_hex') input.sequence = dict.get('sequence') input.required_signatures = dict.get('required_signatures') input.type = dict.get('type') diff --git a/bitcoinetl/mappers/transaction_output_mapper.py b/bitcoinetl/mappers/transaction_output_mapper.py index 3315917..2379f44 100644 --- a/bitcoinetl/mappers/transaction_output_mapper.py +++ b/bitcoinetl/mappers/transaction_output_mapper.py @@ -44,8 +44,8 @@ def json_dict_to_output(self, json_dict, create_transaction_id=None): if 'scriptPubKey' in json_dict: script_pub_key = json_dict.get('scriptPubKey') - output.script_asm = script_pub_key.get('asm') - output.script_hex = script_pub_key.get('hex') + output.script_asm = '' #script_pub_key.get('asm') + output.script_hex = '' #script_pub_key.get('hex') output.required_signatures = script_pub_key.get('reqSigs') output.type = script_pub_key.get('type') output.addresses = script_pub_key.get('addresses') @@ -60,8 +60,8 @@ def outputs_to_dicts(self, outputs): 'create_transaction_id': output.create_transaction_id, 'spending_transaction_id': None, - 'script_asm': output.script_asm, - 'script_hex': output.script_hex, + 'script_asm': '' #output.script_asm, + 'script_hex': '' #output.script_hex, 'type': output.type, 'addresses': output.addresses, @@ -79,8 +79,8 @@ def dicts_to_outputs(self, dicts): for dict in dicts: input = BtcTransactionOutput() input.index = dict.get('index') - input.script_asm = dict.get('script_asm') - input.script_hex = dict.get('script_hex') + input.script_asm = '' #dict.get('script_asm') + input.script_hex = '' #dict.get('script_hex') input.required_signatures = dict.get('required_signatures') input.type = dict.get('type') input.addresses = dict.get('addresses') From 63cce73705101550e886c0ff986991b3e9a308e4 Mon Sep 17 00:00:00 2001 From: saurabhdaga-merkle <55530487+saurabhdaga-merkle@users.noreply.github.com> Date: Wed, 11 Aug 2021 16:38:16 +0530 Subject: [PATCH 24/51] Fix for bsv (#10) add missing ',' --- bitcoinetl/mappers/transaction_input_mapper.py | 4 ++-- bitcoinetl/mappers/transaction_output_mapper.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/bitcoinetl/mappers/transaction_input_mapper.py b/bitcoinetl/mappers/transaction_input_mapper.py index 3ec0120..69fe0f6 100644 --- a/bitcoinetl/mappers/transaction_input_mapper.py +++ b/bitcoinetl/mappers/transaction_input_mapper.py @@ -63,8 +63,8 @@ def inputs_to_dicts(self, inputs): 'create_output_index': input.create_output_index, 'sequence': input.sequence, - 'script_asm': '' #input.script_asm, - 'script_hex': '' #input.script_hex, + 'script_asm': '', #input.script_asm + 'script_hex': '', #input.script_hex 'required_signatures': input.required_signatures, 'addresses': input.addresses, diff --git a/bitcoinetl/mappers/transaction_output_mapper.py b/bitcoinetl/mappers/transaction_output_mapper.py index 2379f44..08848af 100644 --- a/bitcoinetl/mappers/transaction_output_mapper.py +++ b/bitcoinetl/mappers/transaction_output_mapper.py @@ -60,8 +60,8 @@ def outputs_to_dicts(self, outputs): 'create_transaction_id': output.create_transaction_id, 'spending_transaction_id': None, - 'script_asm': '' #output.script_asm, - 'script_hex': '' #output.script_hex, + 'script_asm': '', #output.script_asm + 'script_hex': '', #output.script_hex 'type': output.type, 'addresses': output.addresses, From b109ee2e1c195a960b600bc08f5212c0ad2feaae Mon Sep 17 00:00:00 2001 From: saurabhdaga-merkle Date: Thu, 26 Aug 2021 15:59:37 +0530 Subject: [PATCH 25/51] updated readme --- README_CONTAINER_REGISTRY.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README_CONTAINER_REGISTRY.md b/README_CONTAINER_REGISTRY.md index 00b9581..da49514 100644 --- a/README_CONTAINER_REGISTRY.md +++ b/README_CONTAINER_REGISTRY.md @@ -1,4 +1,4 @@ -BITCOINETL_STREAMING_VERSION=1.3.8-streaming +BITCOINETL_STREAMING_VERSION=1.4-streaming docker build -t merklescience/bitcoin-etl:${BITCOINETL_STREAMING_VERSION} -f Dockerfile_with_streaming . docker tag merklescience/bitcoin-etl:${BITCOINETL_STREAMING_VERSION} us.gcr.io/staging-btc-etl/merklescience/bitcoin-etl:${BITCOINETL_STREAMING_VERSION} docker push us.gcr.io/staging-btc-etl/merklescience/bitcoin-etl:${BITCOINETL_STREAMING_VERSION} From 21537dad3872d34e96cbfd65abadd0d7f95d5ecc Mon Sep 17 00:00:00 2001 From: saurabhdaga-merkle Date: Thu, 26 Aug 2021 17:18:03 +0530 Subject: [PATCH 26/51] updated values --- blockchainetl/jobs/exporters/google_pubsub_item_exporter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/blockchainetl/jobs/exporters/google_pubsub_item_exporter.py b/blockchainetl/jobs/exporters/google_pubsub_item_exporter.py index 497df13..a0676d0 100644 --- a/blockchainetl/jobs/exporters/google_pubsub_item_exporter.py +++ b/blockchainetl/jobs/exporters/google_pubsub_item_exporter.py @@ -30,7 +30,7 @@ class GooglePubSubItemExporter: def __init__(self, item_type_to_topic_mapping, message_attributes=(), - batch_max_bytes=1024 * 5, batch_max_latency=0.01, batch_max_messages=10000): + batch_max_bytes=1024 * 5, batch_max_latency=0.01, batch_max_messages=1000): self.item_type_to_topic_mapping = item_type_to_topic_mapping self.batch_max_bytes = batch_max_bytes From a9be6f273230fec20d5a1178c92637445e6a9daf Mon Sep 17 00:00:00 2001 From: Prasanna Date: Thu, 16 Dec 2021 13:25:47 +0530 Subject: [PATCH 27/51] taproot testing from upstream --- bitcoinetl/mappers/transaction_output_mapper.py | 6 +++++- bitcoinetl/service/btc_service.py | 4 +++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/bitcoinetl/mappers/transaction_output_mapper.py b/bitcoinetl/mappers/transaction_output_mapper.py index 08848af..cb0e1ce 100644 --- a/bitcoinetl/mappers/transaction_output_mapper.py +++ b/bitcoinetl/mappers/transaction_output_mapper.py @@ -48,7 +48,11 @@ def json_dict_to_output(self, json_dict, create_transaction_id=None): output.script_hex = '' #script_pub_key.get('hex') output.required_signatures = script_pub_key.get('reqSigs') output.type = script_pub_key.get('type') - output.addresses = script_pub_key.get('addresses') + #output.addresses = script_pub_key.get('addresses') + if script_pub_key.get('address') is None: + output.addresses = [] + else: + output.addresses = [script_pub_key.get('address')] return output diff --git a/bitcoinetl/service/btc_service.py b/bitcoinetl/service/btc_service.py index c2bcc44..a72d1ca 100644 --- a/bitcoinetl/service/btc_service.py +++ b/bitcoinetl/service/btc_service.py @@ -178,7 +178,9 @@ def _remove_coinbase_input(self, block): def _add_non_standard_addresses(self, transaction): for output in transaction.outputs: if output.addresses is None or len(output.addresses) == 0: - output.type = 'nonstandard' + # output.type = 'nonstandard' + if output.type != 'multisig': + output.type = 'nonstandard' output.addresses = [script_hex_to_non_standard_address(output.script_hex)] def _add_shielded_inputs_and_outputs(self, transaction): From 29406372c1853535d201a59f14ef1e61e41c42e1 Mon Sep 17 00:00:00 2001 From: Akshay Gupta Bursainya <48746146+akshay-ghy@users.noreply.github.com> Date: Thu, 16 Dec 2021 17:42:35 +0530 Subject: [PATCH 28/51] chunking publish to pubsub (#11) --- .../exporters/google_pubsub_item_exporter.py | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/blockchainetl/jobs/exporters/google_pubsub_item_exporter.py b/blockchainetl/jobs/exporters/google_pubsub_item_exporter.py index a0676d0..4ff13bd 100644 --- a/blockchainetl/jobs/exporters/google_pubsub_item_exporter.py +++ b/blockchainetl/jobs/exporters/google_pubsub_item_exporter.py @@ -45,16 +45,21 @@ def open(self): pass def export_items(self, items): - try: - self._export_items_with_timeout(items) - except timeout_decorator.TimeoutError as e: - # A bug in PubSub publisher that makes it stalled after running for some time. - # Exception in thread Thread-CommitBatchPublisher: - # details = "channel is in state TRANSIENT_FAILURE" - # https://stackoverflow.com/questions/55552606/how-can-one-catch-exceptions-in-python-pubsub-subscriber-that-are-happening-in-i?noredirect=1#comment97849067_55552606 - logging.info('Recreating Pub/Sub publisher.') - self.publisher = self.create_publisher() - raise e + tot_steps = (len(items) // 1000) + 1 + logging.info('Total publish loop steps'+str(tot_steps)) + for i in range(0, len(items), 1000): + mini_batch = items[i:i + 1000] + logging.info('Current Loop Iteration' + str(i + 1)+ 'out of'+str(tot_steps)) + try: + self._export_items_with_timeout(mini_batch) + except timeout_decorator.TimeoutError as e: + # A bug in PubSub publisher that makes it stalled after running for some time. + # Exception in thread Thread-CommitBatchPublisher: + # details = "channel is in state TRANSIENT_FAILURE" + # https://stackoverflow.com/questions/55552606/how-can-one-catch-exceptions-in-python-pubsub-subscriber-that-are-happening-in-i?noredirect=1#comment97849067_55552606 + logging.info('Recreating Pub/Sub publisher.') + self.publisher = self.create_publisher() + raise e @timeout_decorator.timeout(300) def _export_items_with_timeout(self, items): From bcc931b4f3f7c4aaeade6510b336389ce462520c Mon Sep 17 00:00:00 2001 From: Prasanna Date: Thu, 16 Dec 2021 19:36:37 +0530 Subject: [PATCH 29/51] hot fix for taproot --- bitcoinetl/mappers/transaction_output_mapper.py | 8 +++++--- bitcoinetl/service/btc_service.py | 4 ++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/bitcoinetl/mappers/transaction_output_mapper.py b/bitcoinetl/mappers/transaction_output_mapper.py index cb0e1ce..edd0361 100644 --- a/bitcoinetl/mappers/transaction_output_mapper.py +++ b/bitcoinetl/mappers/transaction_output_mapper.py @@ -49,10 +49,12 @@ def json_dict_to_output(self, json_dict, create_transaction_id=None): output.required_signatures = script_pub_key.get('reqSigs') output.type = script_pub_key.get('type') #output.addresses = script_pub_key.get('addresses') - if script_pub_key.get('address') is None: - output.addresses = [] + if script_pub_key.get('addresses') is not None and len(script_pub_key.get('addresses')) > 0: + output.addresses = script_pub_key.get('addresses') + elif script_pub_key.get('address') is None: + output.addresses = [] else: - output.addresses = [script_pub_key.get('address')] + output.addresses = [script_pub_key.get('address')] return output diff --git a/bitcoinetl/service/btc_service.py b/bitcoinetl/service/btc_service.py index a72d1ca..fca484c 100644 --- a/bitcoinetl/service/btc_service.py +++ b/bitcoinetl/service/btc_service.py @@ -179,8 +179,8 @@ def _add_non_standard_addresses(self, transaction): for output in transaction.outputs: if output.addresses is None or len(output.addresses) == 0: # output.type = 'nonstandard' - if output.type != 'multisig': - output.type = 'nonstandard' + # if output.type != 'multisig': + output.type = 'nonstandard' output.addresses = [script_hex_to_non_standard_address(output.script_hex)] def _add_shielded_inputs_and_outputs(self, transaction): From a37aefa066297a3b8fdb400f6cd2d05e2e22eef2 Mon Sep 17 00:00:00 2001 From: Akshay Gupta Bursainya <48746146+akshay-ghy@users.noreply.github.com> Date: Sun, 26 Jun 2022 16:40:22 +0530 Subject: [PATCH 30/51] Remove coin_price_usd (#7) --- bitcoinetl/cli/stream.py | 9 +- bitcoinetl/domain/block.py | 1 - bitcoinetl/domain/transaction.py | 1 - .../blocks_and_transactions_item_exporter.py | 2 - bitcoinetl/mappers/block_mapper.py | 2 - bitcoinetl/mappers/transaction_mapper.py | 4 - bitcoinetl/service/btc_service.py | 39 ------ bitcoinetl/streaming/btc_streamer_adapter.py | 9 +- blockchainetl/cryptocompare.py | 125 ------------------ 9 files changed, 5 insertions(+), 187 deletions(-) delete mode 100644 blockchainetl/cryptocompare.py diff --git a/bitcoinetl/cli/stream.py b/bitcoinetl/cli/stream.py index 2384984..fb9df06 100644 --- a/bitcoinetl/cli/stream.py +++ b/bitcoinetl/cli/stream.py @@ -22,7 +22,7 @@ import click -from bitcoinetl.enumeration.chain import Chain, CoinPriceType +from bitcoinetl.enumeration.chain import Chain from bitcoinetl.rpc.bitcoin_rpc import BitcoinRpc from blockchainetl.logging_utils import logging_basic_config @@ -51,11 +51,9 @@ @click.option('--pid-file', default=None, type=str, help='pid file.') @click.option('--enrich', default=True, type=bool, help='Enable filling in transactions inputs fields.') @click.option('--retry_errors', default=True, type=bool, help='Enable Retry on streaming failures') -@click.option('--coin-price-type', default=CoinPriceType.hourly, type=int, - help='Enable querying CryptoCompare for coin prices. 0 for no price, 1 for daily price, 2 for hourly price.') def stream(last_synced_block_file, lag, provider_uri, output, start_block, chain=Chain.BITCOIN, period_seconds=1, batch_size=1, block_batch_size=10, max_workers=5, log_file=None, pid_file=None, - enrich=True, retry_errors=True, coin_price_type=CoinPriceType.hourly): + enrich=True, retry_errors=True): """Streams all data types to console or Google Pub/Sub.""" configure_logging(log_file) configure_signals() @@ -84,6 +82,3 @@ def stream(last_synced_block_file, lag, provider_uri, output, start_block, chain retry_errors=retry_errors ) streamer.stream() - - - diff --git a/bitcoinetl/domain/block.py b/bitcoinetl/domain/block.py index 5e5ee7d..dab89f2 100644 --- a/bitcoinetl/domain/block.py +++ b/bitcoinetl/domain/block.py @@ -54,7 +54,6 @@ def __init__(self): self.block_reward = None self.transaction_fees = None - self.coin_price_usd = None self.coinbase_txid = None self.coinbase_param_decoded = None diff --git a/bitcoinetl/domain/transaction.py b/bitcoinetl/domain/transaction.py index de66cad..6ae4b75 100644 --- a/bitcoinetl/domain/transaction.py +++ b/bitcoinetl/domain/transaction.py @@ -50,7 +50,6 @@ def __init__(self): self.input_value = None self.output_count = None self.output_value = None - self.coin_price_usd = None def add_input(self, input): if len(self.inputs) > 0: diff --git a/bitcoinetl/jobs/exporters/blocks_and_transactions_item_exporter.py b/bitcoinetl/jobs/exporters/blocks_and_transactions_item_exporter.py index 243d20b..d8e1773 100644 --- a/bitcoinetl/jobs/exporters/blocks_and_transactions_item_exporter.py +++ b/bitcoinetl/jobs/exporters/blocks_and_transactions_item_exporter.py @@ -46,7 +46,6 @@ "transaction_count", "block_reward", "transaction_ids", - "coin_price_usd", ] @@ -69,7 +68,6 @@ 'output_value', 'inputs', 'outputs', - 'coin_price_usd', ] diff --git a/bitcoinetl/mappers/block_mapper.py b/bitcoinetl/mappers/block_mapper.py index d72e7f5..59bcabf 100644 --- a/bitcoinetl/mappers/block_mapper.py +++ b/bitcoinetl/mappers/block_mapper.py @@ -68,7 +68,6 @@ def json_dict_to_block(self, json_dict): block.chain_work = json_dict.get("chainwork") block.coinbase_txid = json_dict.get("coinbase_txid") block.previous_block_hash = json_dict.get("previousblockhash") - block.coin_price_usd = json_dict.get('coin_price_usd') return block def block_to_dict(self, block): @@ -94,7 +93,6 @@ def block_to_dict(self, block): 'difficulty': block.difficulty, 'chain_work': block.chain_work, 'previous_block_hash': block.previous_block_hash, - "coin_price_usd": block.coin_price_usd, "transaction_ids": block.transaction_ids } diff --git a/bitcoinetl/mappers/transaction_mapper.py b/bitcoinetl/mappers/transaction_mapper.py index c1094b6..65f6d89 100644 --- a/bitcoinetl/mappers/transaction_mapper.py +++ b/bitcoinetl/mappers/transaction_mapper.py @@ -72,8 +72,6 @@ def json_dict_to_transaction(self, json_dict, block=None, index=None, coin_price transaction.join_splits = self.join_split_mapper.vjoinsplit_to_join_splits(json_dict.get('vjoinsplit')) transaction.value_balance = bitcoin_to_satoshi(json_dict.get('valueBalance')) - # New fields - transaction.coin_price_usd = coin_price_usd transaction.weight = json_dict.get('weight') transaction.output_addresses = self.get_output_addresses(transaction) return transaction @@ -104,7 +102,6 @@ def transaction_to_dict(self, transaction): 'input_value': transaction.calculate_input_value(), 'output_value': transaction.calculate_output_value(), 'fee': transaction.calculate_fee(), - 'coin_price_usd': transaction.coin_price_usd, 'weight': transaction.weight, 'output_addresses': transaction.output_addresses } @@ -123,7 +120,6 @@ def dict_to_transaction(self, dict): transaction.block_timestamp = dict.get('block_timestamp') transaction.is_coinbase = dict.get('is_coinbase') transaction.index = dict.get('index') - transaction.coin_price_usd = dict.get('coin_price_usd') transaction.weight = dict.get('weight') transaction.output_addresses = dict.get('output_addresses') transaction.input_addresses = dict.get('input_addresses') diff --git a/bitcoinetl/service/btc_service.py b/bitcoinetl/service/btc_service.py index fca484c..c868f0e 100644 --- a/bitcoinetl/service/btc_service.py +++ b/bitcoinetl/service/btc_service.py @@ -89,7 +89,6 @@ def get_blocks_by_hashes(self, block_hash_batch, with_transactions=True): if block.has_full_transactions(): for transaction in block.transactions: - self._add_coin_price_to_transaction(transaction, block.coin_price_usd) self._add_non_standard_addresses(transaction) if self.chain == Chain.ZCASH: self._add_shielded_inputs_and_outputs(transaction) @@ -213,42 +212,4 @@ def _add_shielded_inputs_and_outputs(self, transaction): def get_block_reward(self, block): return block.coinbase_tx.calculate_output_value() - def _add_coin_price_to_blocks(self, blocks, coin_price_type): - from_currency_code = Chain.ticker_symbol(self.chain) - - if not from_currency_code or coin_price_type == CoinPriceType.empty: - return - - elif coin_price_type == CoinPriceType.hourly: - block_hour_ids = list(set([get_hour_id_from_ts(block.timestamp) for block in blocks])) - block_hours_ts = {hour_id: get_ts_from_hour_id(hour_id) for hour_id in block_hour_ids} - - for hour_id, hour_ts in block_hours_ts.items(): - if hour_id in self.cached_prices: - continue - - self.cached_prices[hour_id] = get_coin_price(from_currency_code=from_currency_code, timestamp=hour_ts, resource="histohour") - - for block in blocks: - block_hour_id = get_hour_id_from_ts(block.timestamp) - block.coin_price_usd = self.cached_prices[block_hour_id] - - elif coin_price_type == CoinPriceType.daily: - block_day_ids = list(set([get_day_id_from_ts(block.timestamp) for block in blocks])) - block_days_ts = {day_id: get_ts_from_day_id(day_id) for day_id in block_day_ids} - - for day_id, day_ts in block_days_ts.items(): - if day_id in self.cached_prices: - continue - - self.cached_prices[day_id] = get_coin_price(from_currency_code=from_currency_code, timestamp=day_ts, resource="histoday") - - for block in blocks: - block_day_id = get_day_id_from_ts(block.timestamp) - block.coin_price_usd = self.cached_prices[block_day_id] - - def _add_coin_price_to_transaction(self, transaction, coin_price_usd): - transaction.coin_price_usd = coin_price_usd - - ADDRESS_TYPE_SHIELDED = 'shielded' diff --git a/bitcoinetl/streaming/btc_streamer_adapter.py b/bitcoinetl/streaming/btc_streamer_adapter.py index e8c514c..81c9e8d 100644 --- a/bitcoinetl/streaming/btc_streamer_adapter.py +++ b/bitcoinetl/streaming/btc_streamer_adapter.py @@ -23,7 +23,7 @@ import logging -from bitcoinetl.enumeration.chain import Chain, CoinPriceType +from bitcoinetl.enumeration.chain import Chain from bitcoinetl.jobs.enrich_transactions import EnrichTransactionsJob from bitcoinetl.jobs.export_blocks_job import ExportBlocksJob from bitcoinetl.service.btc_service import BtcService @@ -40,8 +40,7 @@ def __init__( chain=Chain.BITCOIN, batch_size=2, enable_enrich=True, - max_workers=5, - coin_price_type=CoinPriceType.empty): + max_workers=5): self.bitcoin_rpc = bitcoin_rpc self.chain = chain self.btc_service = BtcService(bitcoin_rpc, chain) @@ -49,7 +48,6 @@ def __init__( self.batch_size = batch_size self.enable_enrich = enable_enrich self.max_workers = max_workers - self.coin_price_type = coin_price_type self.item_id_calculator = BtcItemIdCalculator() def open(self): @@ -71,8 +69,7 @@ def export_all(self, start_block, end_block): item_exporter=blocks_and_transactions_item_exporter, chain=self.chain, export_blocks=True, - export_transactions=True, - coin_price_type=self.coin_price_type + export_transactions=True ) blocks_and_transactions_job.run() diff --git a/blockchainetl/cryptocompare.py b/blockchainetl/cryptocompare.py deleted file mode 100644 index 5a9e88f..0000000 --- a/blockchainetl/cryptocompare.py +++ /dev/null @@ -1,125 +0,0 @@ -# MIT License -# -# Copyright (c) 2019 Nirmal AK, nirmal@merklescience.com -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import os -import requests -from time import time -from math import floor -from datetime import datetime, timedelta - - -CRYPTOCOMPARE_API_KEY = os.getenv("CRYPTOCOMPARE_API_KEY", "") - - -class CryptoCompareRequestException(Exception): - pass - - -def get_hour_id_from_ts(timestamp: int) -> int: - """ - returns the number of hours elapsed since 1st Jan 2000 - """ - base_ts = datetime(2000, 1, 1).timestamp() - seconds_to_hour = 60 * 60 - return floor((int(timestamp) - base_ts) / seconds_to_hour) - - -def get_day_id_from_ts(timestamp: int) -> int: - """ - returns the number of days elapsed since 1st Jan 2000 - """ - base_ts = datetime(2000, 1, 1).timestamp() - seconds_to_day = 60 * 60 * 24 - return floor((int(timestamp) - base_ts) / seconds_to_day) - - -def get_ts_from_hour_id(hour_id: int) -> int: - base_date = datetime(2000, 1, 1) - reference_date = base_date + timedelta(hours=hour_id) - return floor(reference_date.timestamp()) - - -def get_ts_from_day_id(day_id: int) -> int: - base_date = datetime(2000, 1, 1) - reference_date = base_date + timedelta(days=day_id) - return floor(reference_date.timestamp()) - - -def _make_request( - resource: str, - from_currency_code: str, - to_currency_code: str, - timestamp: int, - access_token: str, - exchange_code: str, - num_records: int, - api_version: str - ) -> requests.Response: - """ - API documentation for cryptocompare can be found at https://min-api.cryptocompare.com/documentation - """ - base_url = f"https://min-api.cryptocompare.com/data/{api_version}/{resource}" - params = { - "fsym": from_currency_code, - "tsym": to_currency_code, - "e": exchange_code, - "limit": num_records, - "toTs": timestamp, - "api_key": access_token - } - return requests.get(base_url, params=params) - - -def get_coin_price( - from_currency_code: str, - timestamp: int, - resource="histohour", - to_currency_code: str="USD", - exchange_code: str="CCCAGG", - num_records: int=1, - api_version: str ="v2", - access_token: str=CRYPTOCOMPARE_API_KEY, - ): - """ - Prices are retrieved from hourly price resource as prices - are available for historical data from when available - """ - response = _make_request( - resource=resource, - from_currency_code=from_currency_code, - to_currency_code=to_currency_code, - timestamp=int(timestamp), - access_token=access_token, - exchange_code=exchange_code, - num_records=num_records, - api_version=api_version, - ) - if not response.status_code == 200: - raise CryptoCompareRequestException - - payload = response.json() - if payload["Type"] != 100: - raise CryptoCompareRequestException(payload.get("Message", "") + access_token) - - data = payload["Data"]["Data"] - avg_price = sum(item["open"] for item in data) / len(data) - return round(avg_price, 8) From a6d645e2a161da27779ea4514096bdc09228874e Mon Sep 17 00:00:00 2001 From: saurabhdaga-merkle <55530487+saurabhdaga-merkle@users.noreply.github.com> Date: Sun, 26 Jun 2022 16:42:45 +0530 Subject: [PATCH 31/51] Remove coin price (#18) --- bitcoinetl/cli/stream.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bitcoinetl/cli/stream.py b/bitcoinetl/cli/stream.py index fb9df06..b5ec94f 100644 --- a/bitcoinetl/cli/stream.py +++ b/bitcoinetl/cli/stream.py @@ -69,7 +69,6 @@ def stream(last_synced_block_file, lag, provider_uri, output, start_block, chain batch_size=batch_size, enable_enrich=enrich, max_workers=max_workers, - coin_price_type=coin_price_type, ) streamer = Streamer( blockchain_streamer_adapter=streamer_adapter, From bf8bf26f5319f18d4354c5e676e290799ae93b2e Mon Sep 17 00:00:00 2001 From: saurabhdaga-merkle <55530487+saurabhdaga-merkle@users.noreply.github.com> Date: Sun, 26 Jun 2022 16:56:14 +0530 Subject: [PATCH 32/51] Remove coin price (#19) --- bitcoinetl/cli/export_all.py | 7 ++----- bitcoinetl/cli/export_blocks_and_transactions.py | 6 +----- bitcoinetl/jobs/export_all.py | 2 -- bitcoinetl/jobs/export_blocks_job.py | 3 +-- bitcoinetl/mappers/transaction_mapper.py | 2 +- bitcoinetl/service/btc_service.py | 12 +----------- 6 files changed, 6 insertions(+), 26 deletions(-) diff --git a/bitcoinetl/cli/export_all.py b/bitcoinetl/cli/export_all.py index 2628e8a..9ef1a7b 100644 --- a/bitcoinetl/cli/export_all.py +++ b/bitcoinetl/cli/export_all.py @@ -96,10 +96,7 @@ def get_partitions(start, end, partition_batch_size, provider_uri): @click.option('-c', '--chain', default=Chain.BITCOIN, type=click.Choice(Chain.ALL), help='The type of chain.') @click.option('--enrich', default=False, type=bool, help='Enable filling in transactions inputs fields.') -@click.option('--coin-price-type', default=CoinPriceType.empty, type=int, - help='Enable querying CryptoCompare for coin prices. 0 for no price, 1 for daily price, 2 for hourly price.') -def export_all(start, end, partition_batch_size, provider_uri, output_dir, max_workers, export_batch_size, chain, enrich, coin_price_type): +def export_all(start, end, partition_batch_size, provider_uri, output_dir, max_workers, export_batch_size, chain, enrich): """Exports all data for a range of blocks.""" do_export_all(chain, get_partitions(start, end, partition_batch_size, provider_uri), - output_dir, provider_uri, max_workers, export_batch_size, enrich, - coin_price_type) + output_dir, provider_uri, max_workers, export_batch_size, enrich) diff --git a/bitcoinetl/cli/export_blocks_and_transactions.py b/bitcoinetl/cli/export_blocks_and_transactions.py index 9d75e08..6c918dc 100644 --- a/bitcoinetl/cli/export_blocks_and_transactions.py +++ b/bitcoinetl/cli/export_blocks_and_transactions.py @@ -48,11 +48,8 @@ 'If not provided transactions will not be exported. Use "-" for stdout') @click.option('-c', '--chain', default=Chain.BITCOIN, type=click.Choice(Chain.ALL), help='The type of chain') -@click.option('--coin-price-type', default=CoinPriceType.empty, type=int, - help='Enable querying CryptoCompare for coin prices. 0 for no price, 1 for daily price, 2 for hourly price.') def export_blocks_and_transactions(start_block, end_block, batch_size, provider_uri, - max_workers, blocks_output, transactions_output, chain, - coin_price_type): + max_workers, blocks_output, transactions_output, chain,): """Export blocks and transactions.""" if blocks_output is None and transactions_output is None: raise ValueError('Either --blocks-output or --transactions-output options must be provided') @@ -67,6 +64,5 @@ def export_blocks_and_transactions(start_block, end_block, batch_size, provider_ chain=chain, export_blocks=blocks_output is not None, export_transactions=transactions_output is not None, - coin_price_type=coin_price_type ) job.run() diff --git a/bitcoinetl/jobs/export_all.py b/bitcoinetl/jobs/export_all.py index e3a75d2..c86e94d 100644 --- a/bitcoinetl/jobs/export_all.py +++ b/bitcoinetl/jobs/export_all.py @@ -42,7 +42,6 @@ def export_all( chain, partitions, output_dir, provider_uri, max_workers, batch_size, enrich, - coin_price_type ): for batch_start_block, batch_end_block, partition_dir, *args in partitions: # # # start # # # @@ -105,7 +104,6 @@ def export_all( item_exporter=blocks_and_transactions_item_exporter(blocks_file, transactions_file), export_blocks=blocks_file is not None, export_transactions=transactions_file is not None, - coin_price_type=coin_price_type, ) job.run() diff --git a/bitcoinetl/jobs/export_blocks_job.py b/bitcoinetl/jobs/export_blocks_job.py index e19b2a1..aab6bf6 100644 --- a/bitcoinetl/jobs/export_blocks_job.py +++ b/bitcoinetl/jobs/export_blocks_job.py @@ -42,8 +42,7 @@ def __init__( item_exporter, chain, export_blocks=True, - export_transactions=True, - coin_price_type=CoinPriceType.empty): + export_transactions=True): validate_range(start_block, end_block) self.start_block = start_block diff --git a/bitcoinetl/mappers/transaction_mapper.py b/bitcoinetl/mappers/transaction_mapper.py index 65f6d89..14899ba 100644 --- a/bitcoinetl/mappers/transaction_mapper.py +++ b/bitcoinetl/mappers/transaction_mapper.py @@ -36,7 +36,7 @@ def __init__(self): self.transaction_output_mapper = BtcTransactionOutputMapper() self.join_split_mapper = BtcJoinSplitMapper() - def json_dict_to_transaction(self, json_dict, block=None, index=None, coin_price_usd=None): + def json_dict_to_transaction(self, json_dict, block=None, index=None): transaction = BtcTransaction() transaction.hash = json_dict.get('hash') transaction.size = json_dict.get('size') diff --git a/bitcoinetl/service/btc_service.py b/bitcoinetl/service/btc_service.py index c868f0e..e59b1dd 100644 --- a/bitcoinetl/service/btc_service.py +++ b/bitcoinetl/service/btc_service.py @@ -30,22 +30,14 @@ from bitcoinetl.service.btc_script_service import script_hex_to_non_standard_address from bitcoinetl.service.genesis_transactions import GENESIS_TRANSACTIONS from blockchainetl.utils import rpc_response_batch_to_results, dynamic_batch_iterator -from blockchainetl.cryptocompare import ( - get_coin_price, - get_hour_id_from_ts, - get_day_id_from_ts, - get_ts_from_hour_id, - get_ts_from_day_id -) class BtcService(object): - def __init__(self, bitcoin_rpc, chain=Chain.BITCOIN, coin_price_type=CoinPriceType.empty): + def __init__(self, bitcoin_rpc, chain=Chain.BITCOIN): self.bitcoin_rpc = bitcoin_rpc self.block_mapper = BtcBlockMapper() self.transaction_mapper = BtcTransactionMapper() self.chain = chain - self.coin_price_type = coin_price_type self.cached_prices = {} def get_block(self, block_number, with_transactions=False): @@ -82,8 +74,6 @@ def get_blocks_by_hashes(self, block_hash_batch, with_transactions=True): if self.chain in Chain.HAVE_OLD_API and with_transactions: self._fetch_transactions(blocks) - self._add_coin_price_to_blocks(blocks, self.coin_price_type) - for block in blocks: self._remove_coinbase_input(block) From 831a083e712227730c50d8a84b33232534c22627 Mon Sep 17 00:00:00 2001 From: saurabhdaga-merkle Date: Sun, 26 Jun 2022 18:47:24 +0530 Subject: [PATCH 33/51] merge branch streaming --- bitcoinetl/jobs/export_blocks_job.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bitcoinetl/jobs/export_blocks_job.py b/bitcoinetl/jobs/export_blocks_job.py index aab6bf6..bbb7d98 100644 --- a/bitcoinetl/jobs/export_blocks_job.py +++ b/bitcoinetl/jobs/export_blocks_job.py @@ -56,7 +56,7 @@ def __init__( if not self.export_blocks and not self.export_transactions: raise ValueError('At least one of export_blocks or export_transactions must be True') - self.btc_service = BtcService(bitcoin_rpc, chain, coin_price_type) + self.btc_service = BtcService(bitcoin_rpc, chain) self.block_mapper = BtcBlockMapper() self.transaction_mapper = BtcTransactionMapper() From e2fd9937034588b9ba87737b18f45855f5bec60c Mon Sep 17 00:00:00 2001 From: Nirmal AK <=> Date: Tue, 30 Aug 2022 22:36:54 +0530 Subject: [PATCH 34/51] Adds cloudbuild config --- Dockerfile | 6 ++++++ Dockerfile_with_streaming | 7 +++++++ cloudbbuild.yaml | 12 ++++++++++++ 3 files changed, 25 insertions(+) create mode 100644 cloudbbuild.yaml diff --git a/Dockerfile b/Dockerfile index c1e5ae9..6aa0567 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,11 +1,17 @@ FROM python:3.6-alpine + MAINTAINER Omidiora Samuel + ENV PROJECT_DIR=bitcoin-etl RUN mkdir /$PROJECT_DIR + WORKDIR /$PROJECT_DIR + COPY . . + RUN apk add --no-cache gcc musl-dev #for C libraries: + RUN pip install --upgrade pip && pip install -e /$PROJECT_DIR/ ENTRYPOINT ["python", "bitcoinetl"] \ No newline at end of file diff --git a/Dockerfile_with_streaming b/Dockerfile_with_streaming index 6390558..07ff44c 100644 --- a/Dockerfile_with_streaming +++ b/Dockerfile_with_streaming @@ -1,15 +1,22 @@ FROM python:3.6 + MAINTAINER Evgeny Medvedev + ENV PROJECT_DIR=bitcoin-etl RUN mkdir /$PROJECT_DIR + WORKDIR /$PROJECT_DIR + COPY . . + RUN pip install --upgrade pip && pip install -e /$PROJECT_DIR/[streaming] # Add Tini ENV TINI_VERSION v0.18.0 + ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /tini + RUN chmod +x /tini ENTRYPOINT ["/tini", "--", "python", "bitcoinetl"] diff --git a/cloudbbuild.yaml b/cloudbbuild.yaml new file mode 100644 index 0000000..4e864f8 --- /dev/null +++ b/cloudbbuild.yaml @@ -0,0 +1,12 @@ +steps: + + - id: build-image + name: 'gcr.io/cloud-builders/docker' + args: ['build', '.', '-t', 'gcr.io/$PROJECT_ID/bitcoin-etl:${TAG_NAME}'] + + - id: push-image + name: 'gcr.io/cloud-builders/docker' + args: ['push', 'gcr.io/$PROJECT_ID/bitcoin-etl:${TAG_NAME}'] + waitFor: ['build-image'] + +timeout: 600s \ No newline at end of file From 37fa609c4f498c0f8a3f72a40f37ceddcc3496f0 Mon Sep 17 00:00:00 2001 From: Nirmal AK <=> Date: Tue, 30 Aug 2022 22:48:35 +0530 Subject: [PATCH 35/51] Adds correct dockerfile --- cloudbbuild.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloudbbuild.yaml b/cloudbbuild.yaml index 4e864f8..21c3e50 100644 --- a/cloudbbuild.yaml +++ b/cloudbbuild.yaml @@ -2,7 +2,7 @@ steps: - id: build-image name: 'gcr.io/cloud-builders/docker' - args: ['build', '.', '-t', 'gcr.io/$PROJECT_ID/bitcoin-etl:${TAG_NAME}'] + args: ['build', ,'-f', 'Dockerfile_with_streaming', '.', '-t', 'gcr.io/$PROJECT_ID/bitcoin-etl:${TAG_NAME}'] - id: push-image name: 'gcr.io/cloud-builders/docker' From 3dbbf7463648f6eb5e2096e8bee64d1d6aad8bee Mon Sep 17 00:00:00 2001 From: Nirmal AK <=> Date: Tue, 30 Aug 2022 22:54:47 +0530 Subject: [PATCH 36/51] Fix filename --- cloudbbuild.yaml => cloudbuild.yaml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename cloudbbuild.yaml => cloudbuild.yaml (100%) diff --git a/cloudbbuild.yaml b/cloudbuild.yaml similarity index 100% rename from cloudbbuild.yaml rename to cloudbuild.yaml From 1cc23127748e4ec79acea2975215aaeeeed68c1f Mon Sep 17 00:00:00 2001 From: Nirmal AK <=> Date: Tue, 30 Aug 2022 23:13:45 +0530 Subject: [PATCH 37/51] Fix typo --- cloudbuild.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cloudbuild.yaml b/cloudbuild.yaml index 21c3e50..fc1605e 100644 --- a/cloudbuild.yaml +++ b/cloudbuild.yaml @@ -2,11 +2,11 @@ steps: - id: build-image name: 'gcr.io/cloud-builders/docker' - args: ['build', ,'-f', 'Dockerfile_with_streaming', '.', '-t', 'gcr.io/$PROJECT_ID/bitcoin-etl:${TAG_NAME}'] + args: ['build', '-f', 'Dockerfile_with_streaming', '.', '-t', 'gcr.io/$PROJECT_ID/bitcoin-etl:${_TAG_NAME}'] - id: push-image name: 'gcr.io/cloud-builders/docker' - args: ['push', 'gcr.io/$PROJECT_ID/bitcoin-etl:${TAG_NAME}'] + args: ['push', 'gcr.io/$PROJECT_ID/bitcoin-etl:${_TAG_NAME}'] waitFor: ['build-image'] timeout: 600s \ No newline at end of file From 42dd40d0255d243cc3bb9d5229b5b4087a638c37 Mon Sep 17 00:00:00 2001 From: Naveen Modi Date: Thu, 17 Aug 2023 10:39:02 +0530 Subject: [PATCH 38/51] added kafka exporter --- bitcoinetl/streaming/streaming_utils.py | 47 ++++++++++++---- .../jobs/exporters/kafka_exporter.py | 54 +++++++++++++++++++ last_synced_block.txt | 1 + setup.py | 3 +- 4 files changed, 94 insertions(+), 11 deletions(-) create mode 100644 blockchainetl/jobs/exporters/kafka_exporter.py create mode 100644 last_synced_block.txt diff --git a/bitcoinetl/streaming/streaming_utils.py b/bitcoinetl/streaming/streaming_utils.py index ea6777f..d44cbb4 100644 --- a/bitcoinetl/streaming/streaming_utils.py +++ b/bitcoinetl/streaming/streaming_utils.py @@ -2,15 +2,42 @@ def get_item_exporter(output): - if output is not None: - from blockchainetl.jobs.exporters.google_pubsub_item_exporter import GooglePubSubItemExporter - item_exporter = GooglePubSubItemExporter( - item_type_to_topic_mapping={ - 'block': output + '.blocks', - 'transaction': output + '.transactions' - }, - message_attributes=('item_id',)) - else: - item_exporter = ConsoleItemExporter() + item_exporter_type = determine_item_exporter_type(output) + if item_exporter_type == ItemExporterType.PUBSUB: + if output is not None: + from blockchainetl.jobs.exporters.google_pubsub_item_exporter import GooglePubSubItemExporter + item_exporter = GooglePubSubItemExporter( + item_type_to_topic_mapping={ + 'block': output + '.blocks', + 'transaction': output + '.transactions' + }, + message_attributes=('item_id',)) + else: + item_exporter = ConsoleItemExporter() + + elif item_exporter_type == ItemExporterType.KAFKA: + from blockchainetl.jobs.exporters.kafka_exporter import KafkaItemExporter + item_exporter = KafkaItemExporter(output, item_type_to_topic_mapping={ + 'block': 'blocks', + 'transaction': 'transactions', + }) + else: + raise ValueError('Unable to determine item exporter type for output ' + output) + return item_exporter + + +def determine_item_exporter_type(output): + if output is not None and output.startswith('projects'): + return ItemExporterType.PUBSUB + if output is not None and output.startswith('kafka'): + return ItemExporterType.KAFKA + else: + return ItemExporterType.UNKNOWN + + +class ItemExporterType: + PUBSUB = 'pubsub' + KAFKA = 'kafka' + UNKNOWN = 'unknown' \ No newline at end of file diff --git a/blockchainetl/jobs/exporters/kafka_exporter.py b/blockchainetl/jobs/exporters/kafka_exporter.py new file mode 100644 index 0000000..5e2f144 --- /dev/null +++ b/blockchainetl/jobs/exporters/kafka_exporter.py @@ -0,0 +1,54 @@ +import collections +import json +import logging + +from kafka import KafkaProducer + +from blockchainetl.jobs.exporters.converters.composite_item_converter import CompositeItemConverter + + +class KafkaItemExporter: + + def __init__(self, output, item_type_to_topic_mapping, converters=()): + self.item_type_to_topic_mapping = item_type_to_topic_mapping + self.converter = CompositeItemConverter(converters) + self.connection_url = self.get_connection_url(output) + print(self.connection_url) + self.producer = KafkaProducer(bootstrap_servers=self.connection_url) + + def get_connection_url(self, output): + try: + return output.split('/')[1] + except KeyError: + raise Exception('Invalid kafka output param, It should be in format of "kafka/127.0.0.1:9092"') + + def open(self): + pass + + def export_items(self, items): + for item in items: + self.export_item(item) + + def export_item(self, item): + item_type = item.get('type') + if item_type is not None and item_type in self.item_type_to_topic_mapping: + data = json.dumps(item).encode('utf-8') + logging.debug(data) + return self.producer.send(self.item_type_to_topic_mapping[item_type], value=data) + else: + logging.warning('Topic for item type "{}" is not configured.'.format(item_type)) + + def convert_items(self, items): + for item in items: + yield self.converter.convert_item(item) + + def close(self): + pass + + +def group_by_item_type(items): + result = collections.defaultdict(list) + for item in items: + result[item.get('type')].append(item) + + return result \ No newline at end of file diff --git a/last_synced_block.txt b/last_synced_block.txt new file mode 100644 index 0000000..29c95b3 --- /dev/null +++ b/last_synced_block.txt @@ -0,0 +1 @@ +500009 diff --git a/setup.py b/setup.py index a00868c..ab0a20d 100644 --- a/setup.py +++ b/setup.py @@ -38,7 +38,8 @@ def read(fname): extras_require={ 'streaming': [ 'timeout-decorator==0.4.1', - 'google-cloud-pubsub==0.39.1' + 'google-cloud-pubsub==0.39.1', + 'kafka-python==2.0.2' ], 'dev': [ 'pytest~=4.3.0' From 5255388999924eb82d520ca8261d81c22939bc8f Mon Sep 17 00:00:00 2001 From: Naveen Modi Date: Fri, 18 Aug 2023 00:58:37 +0530 Subject: [PATCH 39/51] added test file --- test.py | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 test.py diff --git a/test.py b/test.py new file mode 100644 index 0000000..f8a4fbd --- /dev/null +++ b/test.py @@ -0,0 +1,41 @@ +import json + +# Load the JSON payload into a Python dictionary +json_payload = '{"type": "transaction", "hash": "002c3a32568b5eee18ab78a0dadedb44dc2b9a7fa1bb56694e4001c725e7c5d4", "size": 901, "virtual_size": 499, "version": 1, "lock_time": 0, "block_number": 500003, "block_hash": "0000000000000000005467c7a728a3dcb17080d5fdca330043d51e298374f30e", "block_timestamp": 1513622788, "is_coinbase": false, "index": 2504, "inputs": [{"index": 0, "spent_transaction_hash": "3b2cd2d4da475b160d2a42c2a33274f3f96eb739ffc75fbcdf261dd7bfd0d3c8", "spent_output_index": 2, "script_asm": "0014de60e37722b066ad0fa68760b33e28a5507f8c43", "script_hex": "160014de60e37722b066ad0fa68760b33e28a5507f8c43", "sequence": 4294967295, "required_signatures": null, "type": "scripthash", "addresses": ["3PYLmhgWqvxnYpYRAcadSrZVvJHbhA1Hei"], "value": 5220980}, {"index": 1, "spent_transaction_hash": "679cd2264208cf02c0f2b23bb5a3e5e70b625eb4299819d5f2f4b6d8cea1bac0", "spent_output_index": 0, "script_asm": "001483979987981972d175e99679ae0f924aa8a2ad38", "script_hex": "16001483979987981972d175e99679ae0f924aa8a2ad38", "sequence": 4294967295, "required_signatures": null, "type": "scripthash", "addresses": ["3Jz332JDsVBM9uW8fY3efER2jbeAhrP6Bd"], "value": 22346203}, {"index": 2, "spent_transaction_hash": "686037162606a0a7b1127fc3ea696d4a358e4acd415329d56edd24e6cd2fb90d", "spent_output_index": 5, "script_asm": "0014fd4a76457bb54a6255c1e2a2ab1966fbc8c9f67f", "script_hex": "160014fd4a76457bb54a6255c1e2a2ab1966fbc8c9f67f", "sequence": 4294967295, "required_signatures": null, "type": "scripthash", "addresses": ["3MSVex9p5XcNsvCDG6UZUo4YPYEexiPJyk"], "value": 41021800}, {"index": 3, "spent_transaction_hash": "836e945e6253e8f40813fffd46ed7dacf4b822bcb0d24333dc8ac25d1b74fcb5", "spent_output_index": 12, "script_asm": "00148dff9f50777b7cd47672ef4c3b309bd3fff7cd17", "script_hex": "1600148dff9f50777b7cd47672ef4c3b309bd3fff7cd17", "sequence": 4294967295, "required_signatures": null, "type": "scripthash", "addresses": ["3LrGjkpzgzFa52zywa8u6MWCUBxkbiXzbB"], "value": 3350323}, {"index": 4, "spent_transaction_hash": "aed6524e6ce36875024c5a19f5fe02d7e8550b711b13e0ff02ee3f720593631d", "spent_output_index": 12, "script_asm": "0014ddc65ccc7e16c10977d1270924e6cdee65ad6e1f", "script_hex": "160014ddc65ccc7e16c10977d1270924e6cdee65ad6e1f", "sequence": 4294967295, "required_signatures": null, "type": "scripthash", "addresses": ["378UtwycNTwQzQWShjGHXgyyamqEGaXoiP"], "value": 2203341}], "outputs": [{"index": 0, "script_asm": "OP_DUP OP_HASH160 929416cab9f4dee6cd63a0a4844a39aeabaabc01 OP_EQUALVERIFY OP_CHECKSIG", "script_hex": "76a914929416cab9f4dee6cd63a0a4844a39aeabaabc0188ac", "required_signatures": null, "type": "pubkeyhash", "addresses": ["1EN34Qyz1j6hbCjKHoy5eDp9hwwLAz9EJ9"], "value": 74000000}], "input_count": 5, "output_count": 1, "input_value": 74142647, "output_value": 74000000, "fee": 142647, "item_id": "transaction_002c3a32568b5eee18ab78a0dadedb44dc2b9a7fa1bb56694e4001c725e7c5d4"}' +payload_dict = json.loads(json_payload) + +# Define the necessary variables +TYPE_EXTERNAL = "external" +default_token_address = {"chain": "bitcoin", "address": "0x1234567890abcdef"} +supported_currencies = {"bitcoin": "BTC"} + +# Perform the necessary transformations +transformed_transactions = [] +for input in payload_dict["inputs"]: + for output in payload_dict["outputs"]: + if not payload_dict["is_coinbase"]: + if payload_dict["block_timestamp"] >= 946684800: + if output["value"] > 0: + token_outgoing_value = (1e-8 * input["value"]) * (1e-8 * output["value"]) / (1e-8 * output["value"]) + else: + token_outgoing_value = (1e-8 * input["value"]) / payload_dict["output_count"] + if input["value"] > 0: + token_incoming_value = (1e-8 * input["value"]) * (1e-8 * output["value"]) / (1e-8 * input["value"]) + else: + token_incoming_value = 0 + transformed_transactions.append({ + "block": payload_dict["block_number"], + "transaction_id": payload_dict["hash"], + "transaction_ts": payload_dict["block_timestamp"], + "transaction_type": TYPE_EXTERNAL, + "sender_address": "|".join(input["addresses"]), + "receiver_address": "|".join(output["addresses"]), + "token_outgoing_value": token_outgoing_value, + "token_incoming_value": token_incoming_value, + "token_address": default_token_address["address"], + "token_symbol": supported_currencies[default_token_address["chain"]] + }) + +# Print the transformed transactions +for transaction in transformed_transactions: + print(transaction) \ No newline at end of file From 0dbabec19aa07811811a8d979de24adb0fed5a03 Mon Sep 17 00:00:00 2001 From: Naveen Modi Date: Fri, 18 Aug 2023 10:32:17 +0530 Subject: [PATCH 40/51] test file created for bitcoin flatten logic --- test.py => bitcoin_flatten_test.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) rename test.py => bitcoin_flatten_test.py (87%) diff --git a/test.py b/bitcoin_flatten_test.py similarity index 87% rename from test.py rename to bitcoin_flatten_test.py index f8a4fbd..f76dc78 100644 --- a/test.py +++ b/bitcoin_flatten_test.py @@ -1,20 +1,18 @@ import json -# Load the JSON payload into a Python dictionary + json_payload = '{"type": "transaction", "hash": "002c3a32568b5eee18ab78a0dadedb44dc2b9a7fa1bb56694e4001c725e7c5d4", "size": 901, "virtual_size": 499, "version": 1, "lock_time": 0, "block_number": 500003, "block_hash": "0000000000000000005467c7a728a3dcb17080d5fdca330043d51e298374f30e", "block_timestamp": 1513622788, "is_coinbase": false, "index": 2504, "inputs": [{"index": 0, "spent_transaction_hash": "3b2cd2d4da475b160d2a42c2a33274f3f96eb739ffc75fbcdf261dd7bfd0d3c8", "spent_output_index": 2, "script_asm": "0014de60e37722b066ad0fa68760b33e28a5507f8c43", "script_hex": "160014de60e37722b066ad0fa68760b33e28a5507f8c43", "sequence": 4294967295, "required_signatures": null, "type": "scripthash", "addresses": ["3PYLmhgWqvxnYpYRAcadSrZVvJHbhA1Hei"], "value": 5220980}, {"index": 1, "spent_transaction_hash": "679cd2264208cf02c0f2b23bb5a3e5e70b625eb4299819d5f2f4b6d8cea1bac0", "spent_output_index": 0, "script_asm": "001483979987981972d175e99679ae0f924aa8a2ad38", "script_hex": "16001483979987981972d175e99679ae0f924aa8a2ad38", "sequence": 4294967295, "required_signatures": null, "type": "scripthash", "addresses": ["3Jz332JDsVBM9uW8fY3efER2jbeAhrP6Bd"], "value": 22346203}, {"index": 2, "spent_transaction_hash": "686037162606a0a7b1127fc3ea696d4a358e4acd415329d56edd24e6cd2fb90d", "spent_output_index": 5, "script_asm": "0014fd4a76457bb54a6255c1e2a2ab1966fbc8c9f67f", "script_hex": "160014fd4a76457bb54a6255c1e2a2ab1966fbc8c9f67f", "sequence": 4294967295, "required_signatures": null, "type": "scripthash", "addresses": ["3MSVex9p5XcNsvCDG6UZUo4YPYEexiPJyk"], "value": 41021800}, {"index": 3, "spent_transaction_hash": "836e945e6253e8f40813fffd46ed7dacf4b822bcb0d24333dc8ac25d1b74fcb5", "spent_output_index": 12, "script_asm": "00148dff9f50777b7cd47672ef4c3b309bd3fff7cd17", "script_hex": "1600148dff9f50777b7cd47672ef4c3b309bd3fff7cd17", "sequence": 4294967295, "required_signatures": null, "type": "scripthash", "addresses": ["3LrGjkpzgzFa52zywa8u6MWCUBxkbiXzbB"], "value": 3350323}, {"index": 4, "spent_transaction_hash": "aed6524e6ce36875024c5a19f5fe02d7e8550b711b13e0ff02ee3f720593631d", "spent_output_index": 12, "script_asm": "0014ddc65ccc7e16c10977d1270924e6cdee65ad6e1f", "script_hex": "160014ddc65ccc7e16c10977d1270924e6cdee65ad6e1f", "sequence": 4294967295, "required_signatures": null, "type": "scripthash", "addresses": ["378UtwycNTwQzQWShjGHXgyyamqEGaXoiP"], "value": 2203341}], "outputs": [{"index": 0, "script_asm": "OP_DUP OP_HASH160 929416cab9f4dee6cd63a0a4844a39aeabaabc01 OP_EQUALVERIFY OP_CHECKSIG", "script_hex": "76a914929416cab9f4dee6cd63a0a4844a39aeabaabc0188ac", "required_signatures": null, "type": "pubkeyhash", "addresses": ["1EN34Qyz1j6hbCjKHoy5eDp9hwwLAz9EJ9"], "value": 74000000}], "input_count": 5, "output_count": 1, "input_value": 74142647, "output_value": 74000000, "fee": 142647, "item_id": "transaction_002c3a32568b5eee18ab78a0dadedb44dc2b9a7fa1bb56694e4001c725e7c5d4"}' payload_dict = json.loads(json_payload) -# Define the necessary variables -TYPE_EXTERNAL = "external" -default_token_address = {"chain": "bitcoin", "address": "0x1234567890abcdef"} -supported_currencies = {"bitcoin": "BTC"} -# Perform the necessary transformations +TYPE_EXTERNAL = "1" +default_token_address = "0x0000" + + transformed_transactions = [] for input in payload_dict["inputs"]: for output in payload_dict["outputs"]: if not payload_dict["is_coinbase"]: - if payload_dict["block_timestamp"] >= 946684800: if output["value"] > 0: token_outgoing_value = (1e-8 * input["value"]) * (1e-8 * output["value"]) / (1e-8 * output["value"]) else: @@ -23,6 +21,7 @@ token_incoming_value = (1e-8 * input["value"]) * (1e-8 * output["value"]) / (1e-8 * input["value"]) else: token_incoming_value = 0 + transformed_transactions.append({ "block": payload_dict["block_number"], "transaction_id": payload_dict["hash"], @@ -31,11 +30,9 @@ "sender_address": "|".join(input["addresses"]), "receiver_address": "|".join(output["addresses"]), "token_outgoing_value": token_outgoing_value, - "token_incoming_value": token_incoming_value, - "token_address": default_token_address["address"], - "token_symbol": supported_currencies[default_token_address["chain"]] + "token_address": default_token_address }) -# Print the transformed transactions + for transaction in transformed_transactions: print(transaction) \ No newline at end of file From 1581d9526cea834dfa22988c3e0560c8c2202852 Mon Sep 17 00:00:00 2001 From: Naveen Modi Date: Fri, 18 Aug 2023 13:17:57 +0530 Subject: [PATCH 41/51] updated the code --- bitcoin_flatten_test.py | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/bitcoin_flatten_test.py b/bitcoin_flatten_test.py index f76dc78..4408acd 100644 --- a/bitcoin_flatten_test.py +++ b/bitcoin_flatten_test.py @@ -1,27 +1,34 @@ import json +import datetime -json_payload = '{"type": "transaction", "hash": "002c3a32568b5eee18ab78a0dadedb44dc2b9a7fa1bb56694e4001c725e7c5d4", "size": 901, "virtual_size": 499, "version": 1, "lock_time": 0, "block_number": 500003, "block_hash": "0000000000000000005467c7a728a3dcb17080d5fdca330043d51e298374f30e", "block_timestamp": 1513622788, "is_coinbase": false, "index": 2504, "inputs": [{"index": 0, "spent_transaction_hash": "3b2cd2d4da475b160d2a42c2a33274f3f96eb739ffc75fbcdf261dd7bfd0d3c8", "spent_output_index": 2, "script_asm": "0014de60e37722b066ad0fa68760b33e28a5507f8c43", "script_hex": "160014de60e37722b066ad0fa68760b33e28a5507f8c43", "sequence": 4294967295, "required_signatures": null, "type": "scripthash", "addresses": ["3PYLmhgWqvxnYpYRAcadSrZVvJHbhA1Hei"], "value": 5220980}, {"index": 1, "spent_transaction_hash": "679cd2264208cf02c0f2b23bb5a3e5e70b625eb4299819d5f2f4b6d8cea1bac0", "spent_output_index": 0, "script_asm": "001483979987981972d175e99679ae0f924aa8a2ad38", "script_hex": "16001483979987981972d175e99679ae0f924aa8a2ad38", "sequence": 4294967295, "required_signatures": null, "type": "scripthash", "addresses": ["3Jz332JDsVBM9uW8fY3efER2jbeAhrP6Bd"], "value": 22346203}, {"index": 2, "spent_transaction_hash": "686037162606a0a7b1127fc3ea696d4a358e4acd415329d56edd24e6cd2fb90d", "spent_output_index": 5, "script_asm": "0014fd4a76457bb54a6255c1e2a2ab1966fbc8c9f67f", "script_hex": "160014fd4a76457bb54a6255c1e2a2ab1966fbc8c9f67f", "sequence": 4294967295, "required_signatures": null, "type": "scripthash", "addresses": ["3MSVex9p5XcNsvCDG6UZUo4YPYEexiPJyk"], "value": 41021800}, {"index": 3, "spent_transaction_hash": "836e945e6253e8f40813fffd46ed7dacf4b822bcb0d24333dc8ac25d1b74fcb5", "spent_output_index": 12, "script_asm": "00148dff9f50777b7cd47672ef4c3b309bd3fff7cd17", "script_hex": "1600148dff9f50777b7cd47672ef4c3b309bd3fff7cd17", "sequence": 4294967295, "required_signatures": null, "type": "scripthash", "addresses": ["3LrGjkpzgzFa52zywa8u6MWCUBxkbiXzbB"], "value": 3350323}, {"index": 4, "spent_transaction_hash": "aed6524e6ce36875024c5a19f5fe02d7e8550b711b13e0ff02ee3f720593631d", "spent_output_index": 12, "script_asm": "0014ddc65ccc7e16c10977d1270924e6cdee65ad6e1f", "script_hex": "160014ddc65ccc7e16c10977d1270924e6cdee65ad6e1f", "sequence": 4294967295, "required_signatures": null, "type": "scripthash", "addresses": ["378UtwycNTwQzQWShjGHXgyyamqEGaXoiP"], "value": 2203341}], "outputs": [{"index": 0, "script_asm": "OP_DUP OP_HASH160 929416cab9f4dee6cd63a0a4844a39aeabaabc01 OP_EQUALVERIFY OP_CHECKSIG", "script_hex": "76a914929416cab9f4dee6cd63a0a4844a39aeabaabc0188ac", "required_signatures": null, "type": "pubkeyhash", "addresses": ["1EN34Qyz1j6hbCjKHoy5eDp9hwwLAz9EJ9"], "value": 74000000}], "input_count": 5, "output_count": 1, "input_value": 74142647, "output_value": 74000000, "fee": 142647, "item_id": "transaction_002c3a32568b5eee18ab78a0dadedb44dc2b9a7fa1bb56694e4001c725e7c5d4"}' +json_payload = '{"type": "transaction", "hash": "002c3a32568b5eee18ab78a0dadedb44dc2b9a7fa1bb56694e4001c725e7c5d4", "size": 901, "virtual_size": 499, "version": 1, "lock_time": 0, "block_number": 500003, "block_hash": "0000000000000000005467c7a728a3dcb17080d5fdca330043d51e298374f30e", "block_timestamp": 1513622788, "is_coinbase": true, "index": 2504, "inputs": [{"index": 0, "spent_transaction_hash": "3b2cd2d4da475b160d2a42c2a33274f3f96eb739ffc75fbcdf261dd7bfd0d3c8", "spent_output_index": 2, "script_asm": "0014de60e37722b066ad0fa68760b33e28a5507f8c43", "script_hex": "160014de60e37722b066ad0fa68760b33e28a5507f8c43", "sequence": 4294967295, "required_signatures": null, "type": "scripthash", "addresses": ["3PYLmhgWqvxnYpYRAcadSrZVvJHbhA1Hei"], "value": 5220980}, {"index": 1, "spent_transaction_hash": "679cd2264208cf02c0f2b23bb5a3e5e70b625eb4299819d5f2f4b6d8cea1bac0", "spent_output_index": 0, "script_asm": "001483979987981972d175e99679ae0f924aa8a2ad38", "script_hex": "16001483979987981972d175e99679ae0f924aa8a2ad38", "sequence": 4294967295, "required_signatures": null, "type": "scripthash", "addresses": ["3Jz332JDsVBM9uW8fY3efER2jbeAhrP6Bd"], "value": 22346203}, {"index": 2, "spent_transaction_hash": "686037162606a0a7b1127fc3ea696d4a358e4acd415329d56edd24e6cd2fb90d", "spent_output_index": 5, "script_asm": "0014fd4a76457bb54a6255c1e2a2ab1966fbc8c9f67f", "script_hex": "160014fd4a76457bb54a6255c1e2a2ab1966fbc8c9f67f", "sequence": 4294967295, "required_signatures": null, "type": "scripthash", "addresses": ["3MSVex9p5XcNsvCDG6UZUo4YPYEexiPJyk"], "value": 41021800}, {"index": 3, "spent_transaction_hash": "836e945e6253e8f40813fffd46ed7dacf4b822bcb0d24333dc8ac25d1b74fcb5", "spent_output_index": 12, "script_asm": "00148dff9f50777b7cd47672ef4c3b309bd3fff7cd17", "script_hex": "1600148dff9f50777b7cd47672ef4c3b309bd3fff7cd17", "sequence": 4294967295, "required_signatures": null, "type": "scripthash", "addresses": ["3LrGjkpzgzFa52zywa8u6MWCUBxkbiXzbB"], "value": 3350323}, {"index": 4, "spent_transaction_hash": "aed6524e6ce36875024c5a19f5fe02d7e8550b711b13e0ff02ee3f720593631d", "spent_output_index": 12, "script_asm": "0014ddc65ccc7e16c10977d1270924e6cdee65ad6e1f", "script_hex": "160014ddc65ccc7e16c10977d1270924e6cdee65ad6e1f", "sequence": 4294967295, "required_signatures": null, "type": "scripthash", "addresses": ["378UtwycNTwQzQWShjGHXgyyamqEGaXoiP"], "value": 2203341}], "outputs": [{"index": 0, "script_asm": "OP_DUP OP_HASH160 929416cab9f4dee6cd63a0a4844a39aeabaabc01 OP_EQUALVERIFY OP_CHECKSIG", "script_hex": "76a914929416cab9f4dee6cd63a0a4844a39aeabaabc0188ac", "required_signatures": null, "type": "pubkeyhash", "addresses": ["1EN34Qyz1j6hbCjKHoy5eDp9hwwLAz9EJ9"], "value": 74000000}], "input_count": 5, "output_count": 1, "input_value": 74142647, "output_value": 74000000, "fee": 142647, "item_id": "transaction_002c3a32568b5eee18ab78a0dadedb44dc2b9a7fa1bb56694e4001c725e7c5d4"}' payload_dict = json.loads(json_payload) -TYPE_EXTERNAL = "1" +TYPE_EXTERNAL = 1 default_token_address = "0x0000" +NULL_ADDRESS_MINT = "Mint" + +TYPE_BLOCK_REWARD = 3 + transformed_transactions = [] for input in payload_dict["inputs"]: for output in payload_dict["outputs"]: if not payload_dict["is_coinbase"]: if output["value"] > 0: - token_outgoing_value = (1e-8 * input["value"]) * (1e-8 * output["value"]) / (1e-8 * output["value"]) + token_outgoing_value = round((1e-8 * input["value"]) * (1e-8 * output["value"]) / (1e-8 * payload_dict["output_value"]),8) else: - token_outgoing_value = (1e-8 * input["value"]) / payload_dict["output_count"] + token_outgoing_value = round((1e-8 * input["value"]) / payload_dict["output_count"],8) if input["value"] > 0: - token_incoming_value = (1e-8 * input["value"]) * (1e-8 * output["value"]) / (1e-8 * input["value"]) + token_incoming_value = round((1e-8 * input["value"]) * (1e-8 * output["value"]) / (1e-8 * payload_dict["input_value"]),8) else: token_incoming_value = 0 + token_outgoing_fee = round(token_outgoing_value - token_incoming_value,9) + transformed_transactions.append({ "block": payload_dict["block_number"], "transaction_id": payload_dict["hash"], @@ -30,7 +37,23 @@ "sender_address": "|".join(input["addresses"]), "receiver_address": "|".join(output["addresses"]), "token_outgoing_value": token_outgoing_value, - "token_address": default_token_address + "token_address": default_token_address, + "token_incoming_value":token_incoming_value, + "token_outgoing_fee": token_outgoing_fee + }) + else: + + transformed_transactions.append({ + "block": payload_dict["block_number"], + "transaction_id": payload_dict["hash"], + "transaction_ts": payload_dict["block_timestamp"], + "transaction_type": TYPE_BLOCK_REWARD, + "sender_address": f"{NULL_ADDRESS_MINT}_{datetime.datetime.fromtimestamp(payload_dict['block_timestamp']).month}", + "receiver_address": "|".join(output["addresses"]), + "token_outgoing_value": 1e-8 * output["value"], + "token_incoming_value": 1e-8 * output["value"], + "token_address": default_token_address, + "token_outgoing_fee": 0 }) From 3b35718c06c0f27be60085580479720cd3449603 Mon Sep 17 00:00:00 2001 From: Naveen Modi Date: Fri, 18 Aug 2023 22:24:24 +0530 Subject: [PATCH 42/51] added bitcoin flatten transformation --- bitcoin_flatten_test.py | 61 ------------------ bitcoinetl/streaming/streaming_utils.py | 4 +- .../jobs/exporters/bitcoin_flatten.py | 62 +++++++++++++++++++ .../jobs/exporters/kafka_exporter.py | 27 +++++--- last_synced_block.txt | 2 +- 5 files changed, 82 insertions(+), 74 deletions(-) delete mode 100644 bitcoin_flatten_test.py create mode 100644 blockchainetl/jobs/exporters/bitcoin_flatten.py diff --git a/bitcoin_flatten_test.py b/bitcoin_flatten_test.py deleted file mode 100644 index 4408acd..0000000 --- a/bitcoin_flatten_test.py +++ /dev/null @@ -1,61 +0,0 @@ -import json -import datetime - - -json_payload = '{"type": "transaction", "hash": "002c3a32568b5eee18ab78a0dadedb44dc2b9a7fa1bb56694e4001c725e7c5d4", "size": 901, "virtual_size": 499, "version": 1, "lock_time": 0, "block_number": 500003, "block_hash": "0000000000000000005467c7a728a3dcb17080d5fdca330043d51e298374f30e", "block_timestamp": 1513622788, "is_coinbase": true, "index": 2504, "inputs": [{"index": 0, "spent_transaction_hash": "3b2cd2d4da475b160d2a42c2a33274f3f96eb739ffc75fbcdf261dd7bfd0d3c8", "spent_output_index": 2, "script_asm": "0014de60e37722b066ad0fa68760b33e28a5507f8c43", "script_hex": "160014de60e37722b066ad0fa68760b33e28a5507f8c43", "sequence": 4294967295, "required_signatures": null, "type": "scripthash", "addresses": ["3PYLmhgWqvxnYpYRAcadSrZVvJHbhA1Hei"], "value": 5220980}, {"index": 1, "spent_transaction_hash": "679cd2264208cf02c0f2b23bb5a3e5e70b625eb4299819d5f2f4b6d8cea1bac0", "spent_output_index": 0, "script_asm": "001483979987981972d175e99679ae0f924aa8a2ad38", "script_hex": "16001483979987981972d175e99679ae0f924aa8a2ad38", "sequence": 4294967295, "required_signatures": null, "type": "scripthash", "addresses": ["3Jz332JDsVBM9uW8fY3efER2jbeAhrP6Bd"], "value": 22346203}, {"index": 2, "spent_transaction_hash": "686037162606a0a7b1127fc3ea696d4a358e4acd415329d56edd24e6cd2fb90d", "spent_output_index": 5, "script_asm": "0014fd4a76457bb54a6255c1e2a2ab1966fbc8c9f67f", "script_hex": "160014fd4a76457bb54a6255c1e2a2ab1966fbc8c9f67f", "sequence": 4294967295, "required_signatures": null, "type": "scripthash", "addresses": ["3MSVex9p5XcNsvCDG6UZUo4YPYEexiPJyk"], "value": 41021800}, {"index": 3, "spent_transaction_hash": "836e945e6253e8f40813fffd46ed7dacf4b822bcb0d24333dc8ac25d1b74fcb5", "spent_output_index": 12, "script_asm": "00148dff9f50777b7cd47672ef4c3b309bd3fff7cd17", "script_hex": "1600148dff9f50777b7cd47672ef4c3b309bd3fff7cd17", "sequence": 4294967295, "required_signatures": null, "type": "scripthash", "addresses": ["3LrGjkpzgzFa52zywa8u6MWCUBxkbiXzbB"], "value": 3350323}, {"index": 4, "spent_transaction_hash": "aed6524e6ce36875024c5a19f5fe02d7e8550b711b13e0ff02ee3f720593631d", "spent_output_index": 12, "script_asm": "0014ddc65ccc7e16c10977d1270924e6cdee65ad6e1f", "script_hex": "160014ddc65ccc7e16c10977d1270924e6cdee65ad6e1f", "sequence": 4294967295, "required_signatures": null, "type": "scripthash", "addresses": ["378UtwycNTwQzQWShjGHXgyyamqEGaXoiP"], "value": 2203341}], "outputs": [{"index": 0, "script_asm": "OP_DUP OP_HASH160 929416cab9f4dee6cd63a0a4844a39aeabaabc01 OP_EQUALVERIFY OP_CHECKSIG", "script_hex": "76a914929416cab9f4dee6cd63a0a4844a39aeabaabc0188ac", "required_signatures": null, "type": "pubkeyhash", "addresses": ["1EN34Qyz1j6hbCjKHoy5eDp9hwwLAz9EJ9"], "value": 74000000}], "input_count": 5, "output_count": 1, "input_value": 74142647, "output_value": 74000000, "fee": 142647, "item_id": "transaction_002c3a32568b5eee18ab78a0dadedb44dc2b9a7fa1bb56694e4001c725e7c5d4"}' -payload_dict = json.loads(json_payload) - - -TYPE_EXTERNAL = 1 -default_token_address = "0x0000" - -NULL_ADDRESS_MINT = "Mint" - -TYPE_BLOCK_REWARD = 3 - - -transformed_transactions = [] -for input in payload_dict["inputs"]: - for output in payload_dict["outputs"]: - if not payload_dict["is_coinbase"]: - if output["value"] > 0: - token_outgoing_value = round((1e-8 * input["value"]) * (1e-8 * output["value"]) / (1e-8 * payload_dict["output_value"]),8) - else: - token_outgoing_value = round((1e-8 * input["value"]) / payload_dict["output_count"],8) - if input["value"] > 0: - token_incoming_value = round((1e-8 * input["value"]) * (1e-8 * output["value"]) / (1e-8 * payload_dict["input_value"]),8) - else: - token_incoming_value = 0 - - token_outgoing_fee = round(token_outgoing_value - token_incoming_value,9) - - transformed_transactions.append({ - "block": payload_dict["block_number"], - "transaction_id": payload_dict["hash"], - "transaction_ts": payload_dict["block_timestamp"], - "transaction_type": TYPE_EXTERNAL, - "sender_address": "|".join(input["addresses"]), - "receiver_address": "|".join(output["addresses"]), - "token_outgoing_value": token_outgoing_value, - "token_address": default_token_address, - "token_incoming_value":token_incoming_value, - "token_outgoing_fee": token_outgoing_fee - }) - else: - - transformed_transactions.append({ - "block": payload_dict["block_number"], - "transaction_id": payload_dict["hash"], - "transaction_ts": payload_dict["block_timestamp"], - "transaction_type": TYPE_BLOCK_REWARD, - "sender_address": f"{NULL_ADDRESS_MINT}_{datetime.datetime.fromtimestamp(payload_dict['block_timestamp']).month}", - "receiver_address": "|".join(output["addresses"]), - "token_outgoing_value": 1e-8 * output["value"], - "token_incoming_value": 1e-8 * output["value"], - "token_address": default_token_address, - "token_outgoing_fee": 0 - }) - - -for transaction in transformed_transactions: - print(transaction) \ No newline at end of file diff --git a/bitcoinetl/streaming/streaming_utils.py b/bitcoinetl/streaming/streaming_utils.py index d44cbb4..a2f3338 100644 --- a/bitcoinetl/streaming/streaming_utils.py +++ b/bitcoinetl/streaming/streaming_utils.py @@ -19,8 +19,8 @@ def get_item_exporter(output): elif item_exporter_type == ItemExporterType.KAFKA: from blockchainetl.jobs.exporters.kafka_exporter import KafkaItemExporter item_exporter = KafkaItemExporter(output, item_type_to_topic_mapping={ - 'block': 'blocks', - 'transaction': 'transactions', + 'block': 'bitcoin.hot.blocks', + 'transaction': 'bitcoin.hot.transactions', }) else: raise ValueError('Unable to determine item exporter type for output ' + output) diff --git a/blockchainetl/jobs/exporters/bitcoin_flatten.py b/blockchainetl/jobs/exporters/bitcoin_flatten.py new file mode 100644 index 0000000..c23a86b --- /dev/null +++ b/blockchainetl/jobs/exporters/bitcoin_flatten.py @@ -0,0 +1,62 @@ +import json +import datetime +from decimal import Decimal + + +def flatten_transformation(payload_dict): + + TYPE_EXTERNAL = 1 + default_token_address = "0x0000" + NULL_ADDRESS_MINT = "Mint" + TYPE_BLOCK_REWARD = 3 + + transformed_transactions = [] + for input in payload_dict["inputs"]: + for output in payload_dict["outputs"]: + if not payload_dict["is_coinbase"]: + if output["value"] > 0: + token_outgoing_value = Decimal((1e-8 * input["value"]) * (1e-8 * output["value"]) / (1e-8 * payload_dict["output_value"])) + else: + token_outgoing_value = Decimal((1e-8 * input["value"]) / payload_dict["output_count"]) + if input["value"] > 0: + token_incoming_value = Decimal((1e-8 * input["value"]) * (1e-8 * output["value"]) / (1e-8 * payload_dict["input_value"])) + else: + token_incoming_value = 0 + + token_outgoing_fee = token_outgoing_value - token_incoming_value + token_outgoing_fee = f'{float(token_outgoing_fee):.16f}' + # token_outgoing_value = float(token_outgoing_value) + # token_incoming_value = float(token_incoming_value) + + + + transformed_transactions.append({ + "block": payload_dict["block_number"], + "transaction_id": payload_dict["hash"], + "transaction_ts": payload_dict["block_timestamp"], + "transaction_type": TYPE_EXTERNAL, + "sender_address": "|".join(input["addresses"]), + "receiver_address": "|".join(output["addresses"]), + "token_outgoing_value": float(token_outgoing_value), + "token_address": default_token_address, + "token_incoming_value": float(token_incoming_value), + "token_outgoing_fee": token_outgoing_fee + }) + else: + + transformed_transactions.append({ + "block": payload_dict["block_number"], + "transaction_id": payload_dict["hash"], + "transaction_ts": payload_dict["block_timestamp"], + "transaction_type": TYPE_BLOCK_REWARD, + "sender_address": f"{NULL_ADDRESS_MINT}_{datetime.datetime.fromtimestamp(payload_dict['block_timestamp']).month}", + "receiver_address": "|".join(output["addresses"]), + "token_outgoing_value": 1e-8 * output["value"], + "token_incoming_value": 1e-8 * output["value"], + "token_address": default_token_address, + "token_outgoing_fee": 0 + }) + + + return transformed_transactions + \ No newline at end of file diff --git a/blockchainetl/jobs/exporters/kafka_exporter.py b/blockchainetl/jobs/exporters/kafka_exporter.py index 5e2f144..2c8101e 100644 --- a/blockchainetl/jobs/exporters/kafka_exporter.py +++ b/blockchainetl/jobs/exporters/kafka_exporter.py @@ -5,6 +5,7 @@ from kafka import KafkaProducer from blockchainetl.jobs.exporters.converters.composite_item_converter import CompositeItemConverter +from blockchainetl.jobs.exporters.bitcoin_flatten import flatten_transformation class KafkaItemExporter: @@ -27,16 +28,22 @@ def open(self): def export_items(self, items): for item in items: - self.export_item(item) - - def export_item(self, item): - item_type = item.get('type') - if item_type is not None and item_type in self.item_type_to_topic_mapping: - data = json.dumps(item).encode('utf-8') - logging.debug(data) - return self.producer.send(self.item_type_to_topic_mapping[item_type], value=data) - else: - logging.warning('Topic for item type "{}" is not configured.'.format(item_type)) + item_type = item.get('type') + if item_type is not None and item_type in self.item_type_to_topic_mapping: + if(item_type == "transaction"): + transformed_data = flatten_transformation(item) + for data in transformed_data: + self.export_item(data,item_type) + else: + self.export_item(item,item_type) + else: + logging.warning('Topic for item type "{}" is not configured.'.format(item_type)) + + def export_item(self, item, item_type): + data = json.dumps(item).encode('utf-8') + logging.debug(data) + return self.producer.send(self.item_type_to_topic_mapping[item_type], value=data) + def convert_items(self, items): for item in items: diff --git a/last_synced_block.txt b/last_synced_block.txt index 29c95b3..412a553 100644 --- a/last_synced_block.txt +++ b/last_synced_block.txt @@ -1 +1 @@ -500009 +500014 From 2b33c5804fac0562072d2b7f39843b31c9d5c5cd Mon Sep 17 00:00:00 2001 From: Naveen Modi Date: Sat, 19 Aug 2023 20:45:59 +0530 Subject: [PATCH 43/51] confluent conf added --- Dockerfile | 2 +- Dockerfile_with_streaming | 2 +- bitcoinetl/cli/stream.py | 6 ++++-- bitcoinetl/streaming/streaming_utils.py | 4 ++-- .../jobs/exporters/bitcoin_flatten.py | 12 +++--------- blockchainetl/jobs/exporters/kafka_exporter.py | 18 +++++++++++++++--- 6 files changed, 26 insertions(+), 18 deletions(-) diff --git a/Dockerfile b/Dockerfile index c1e5ae9..5beda47 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,6 +6,6 @@ RUN mkdir /$PROJECT_DIR WORKDIR /$PROJECT_DIR COPY . . RUN apk add --no-cache gcc musl-dev #for C libraries: -RUN pip install --upgrade pip && pip install -e /$PROJECT_DIR/ +RUN pip install --upgrade pip && pip install -e /$PROJECT_DIR/ && pip install dist/bitcoin-etl-1.5.2.tar.gz && pip install kafka-python==2.0.2 ENTRYPOINT ["python", "bitcoinetl"] \ No newline at end of file diff --git a/Dockerfile_with_streaming b/Dockerfile_with_streaming index 6390558..d593f6b 100644 --- a/Dockerfile_with_streaming +++ b/Dockerfile_with_streaming @@ -5,7 +5,7 @@ ENV PROJECT_DIR=bitcoin-etl RUN mkdir /$PROJECT_DIR WORKDIR /$PROJECT_DIR COPY . . -RUN pip install --upgrade pip && pip install -e /$PROJECT_DIR/[streaming] +RUN pip install --upgrade pip && pip install dist/bitcoin-etl-1.5.2.tar.gz && pip install -e /$PROJECT_DIR/[streaming] # Add Tini ENV TINI_VERSION v0.18.0 diff --git a/bitcoinetl/cli/stream.py b/bitcoinetl/cli/stream.py index 46170df..76c788a 100644 --- a/bitcoinetl/cli/stream.py +++ b/bitcoinetl/cli/stream.py @@ -41,6 +41,8 @@ @click.option('-o', '--output', type=str, help='Google PubSub topic path e.g. projects/your-project/topics/bitcoin_blockchain. ' 'If not specified will print to console.') +@click.option('-k', '--kafka_config', default=None, type=str, + help='pass the kafka config') @click.option('-s', '--start-block', default=None, type=int, help='Start block.') @click.option('-c', '--chain', default=Chain.BITCOIN, type=click.Choice(Chain.ALL), help='The type of chain.') @click.option('--period-seconds', default=10, type=int, help='How many seconds to sleep between syncs.') @@ -50,7 +52,7 @@ @click.option('--log-file', default=None, type=str, help='Log file.') @click.option('--pid-file', default=None, type=str, help='pid file.') @click.option('--enrich', default=True, type=bool, help='Enable filling in transactions inputs fields.') -def stream(last_synced_block_file, lag, provider_uri, output, start_block, chain=Chain.BITCOIN, +def stream(last_synced_block_file, lag, provider_uri, output, kafka_config, start_block, chain=Chain.BITCOIN, period_seconds=10, batch_size=2, block_batch_size=10, max_workers=5, log_file=None, pid_file=None, enrich=True): """Streams all data types to console or Google Pub/Sub.""" @@ -63,7 +65,7 @@ def stream(last_synced_block_file, lag, provider_uri, output, start_block, chain streamer_adapter = BtcStreamerAdapter( bitcoin_rpc=ThreadLocalProxy(lambda: BitcoinRpc(provider_uri)), - item_exporter=get_item_exporter(output), + item_exporter=get_item_exporter(output,kafka_config), chain=chain, batch_size=batch_size, enable_enrich=enrich, diff --git a/bitcoinetl/streaming/streaming_utils.py b/bitcoinetl/streaming/streaming_utils.py index a2f3338..d747eff 100644 --- a/bitcoinetl/streaming/streaming_utils.py +++ b/bitcoinetl/streaming/streaming_utils.py @@ -19,8 +19,8 @@ def get_item_exporter(output): elif item_exporter_type == ItemExporterType.KAFKA: from blockchainetl.jobs.exporters.kafka_exporter import KafkaItemExporter item_exporter = KafkaItemExporter(output, item_type_to_topic_mapping={ - 'block': 'bitcoin.hot.blocks', - 'transaction': 'bitcoin.hot.transactions', + 'block': 'producer.bitcoin.hot.blocks', + 'transaction': 'producer.bitcoin.hot.transactions', }) else: raise ValueError('Unable to determine item exporter type for output ' + output) diff --git a/blockchainetl/jobs/exporters/bitcoin_flatten.py b/blockchainetl/jobs/exporters/bitcoin_flatten.py index c23a86b..36252c7 100644 --- a/blockchainetl/jobs/exporters/bitcoin_flatten.py +++ b/blockchainetl/jobs/exporters/bitcoin_flatten.py @@ -11,8 +11,8 @@ def flatten_transformation(payload_dict): TYPE_BLOCK_REWARD = 3 transformed_transactions = [] - for input in payload_dict["inputs"]: - for output in payload_dict["outputs"]: + for output in payload_dict["outputs"]: + for input in payload_dict["inputs"]: if not payload_dict["is_coinbase"]: if output["value"] > 0: token_outgoing_value = Decimal((1e-8 * input["value"]) * (1e-8 * output["value"]) / (1e-8 * payload_dict["output_value"])) @@ -24,11 +24,6 @@ def flatten_transformation(payload_dict): token_incoming_value = 0 token_outgoing_fee = token_outgoing_value - token_incoming_value - token_outgoing_fee = f'{float(token_outgoing_fee):.16f}' - # token_outgoing_value = float(token_outgoing_value) - # token_incoming_value = float(token_incoming_value) - - transformed_transactions.append({ "block": payload_dict["block_number"], @@ -40,10 +35,9 @@ def flatten_transformation(payload_dict): "token_outgoing_value": float(token_outgoing_value), "token_address": default_token_address, "token_incoming_value": float(token_incoming_value), - "token_outgoing_fee": token_outgoing_fee + "token_outgoing_fee": float(token_outgoing_fee) }) else: - transformed_transactions.append({ "block": payload_dict["block_number"], "transaction_id": payload_dict["hash"], diff --git a/blockchainetl/jobs/exporters/kafka_exporter.py b/blockchainetl/jobs/exporters/kafka_exporter.py index 2c8101e..6925131 100644 --- a/blockchainetl/jobs/exporters/kafka_exporter.py +++ b/blockchainetl/jobs/exporters/kafka_exporter.py @@ -1,8 +1,10 @@ import collections import json import logging +import os +import socket -from kafka import KafkaProducer +from confluent_kafka import Producer from blockchainetl.jobs.exporters.converters.composite_item_converter import CompositeItemConverter from blockchainetl.jobs.exporters.bitcoin_flatten import flatten_transformation @@ -15,7 +17,17 @@ def __init__(self, output, item_type_to_topic_mapping, converters=()): self.converter = CompositeItemConverter(converters) self.connection_url = self.get_connection_url(output) print(self.connection_url) - self.producer = KafkaProducer(bootstrap_servers=self.connection_url) + conf = { + "bootstrap.servers": os.getenv("CONFLUENT_ENDPOINT"), + "security.protocol": "SASL_SSL", + "sasl.mechanisms": "PLAIN", + "client.id": socket.gethostname(), + "message.max.bytes": 5242880, + "sasl.username": os.getenv("BLOCKCHAIN_PRODUCER_KEY"), + "sasl.password": os.getenv("BLOCKCHAIN_PRODUCER_SECRET") + } + + self.producer = Producer(conf) def get_connection_url(self, output): try: @@ -42,7 +54,7 @@ def export_items(self, items): def export_item(self, item, item_type): data = json.dumps(item).encode('utf-8') logging.debug(data) - return self.producer.send(self.item_type_to_topic_mapping[item_type], value=data) + return self.producer.produce(self.item_type_to_topic_mapping[item_type], value=data) def convert_items(self, items): From 426051835ce3e9993c660002b8a07af63f54ad33 Mon Sep 17 00:00:00 2001 From: Naveen Modi Date: Sat, 19 Aug 2023 20:47:55 +0530 Subject: [PATCH 44/51] new changes --- bitcoinetl/cli/stream.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/bitcoinetl/cli/stream.py b/bitcoinetl/cli/stream.py index 76c788a..46170df 100644 --- a/bitcoinetl/cli/stream.py +++ b/bitcoinetl/cli/stream.py @@ -41,8 +41,6 @@ @click.option('-o', '--output', type=str, help='Google PubSub topic path e.g. projects/your-project/topics/bitcoin_blockchain. ' 'If not specified will print to console.') -@click.option('-k', '--kafka_config', default=None, type=str, - help='pass the kafka config') @click.option('-s', '--start-block', default=None, type=int, help='Start block.') @click.option('-c', '--chain', default=Chain.BITCOIN, type=click.Choice(Chain.ALL), help='The type of chain.') @click.option('--period-seconds', default=10, type=int, help='How many seconds to sleep between syncs.') @@ -52,7 +50,7 @@ @click.option('--log-file', default=None, type=str, help='Log file.') @click.option('--pid-file', default=None, type=str, help='pid file.') @click.option('--enrich', default=True, type=bool, help='Enable filling in transactions inputs fields.') -def stream(last_synced_block_file, lag, provider_uri, output, kafka_config, start_block, chain=Chain.BITCOIN, +def stream(last_synced_block_file, lag, provider_uri, output, start_block, chain=Chain.BITCOIN, period_seconds=10, batch_size=2, block_batch_size=10, max_workers=5, log_file=None, pid_file=None, enrich=True): """Streams all data types to console or Google Pub/Sub.""" @@ -65,7 +63,7 @@ def stream(last_synced_block_file, lag, provider_uri, output, kafka_config, star streamer_adapter = BtcStreamerAdapter( bitcoin_rpc=ThreadLocalProxy(lambda: BitcoinRpc(provider_uri)), - item_exporter=get_item_exporter(output,kafka_config), + item_exporter=get_item_exporter(output), chain=chain, batch_size=batch_size, enable_enrich=enrich, From 7b128dc0176693ea883b728e824a99c325fdd120 Mon Sep 17 00:00:00 2001 From: Naveen Modi Date: Mon, 21 Aug 2023 01:02:19 +0530 Subject: [PATCH 45/51] added key in kafka producer --- blockchainetl/jobs/exporters/kafka_exporter.py | 2 +- last_synced_block.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/blockchainetl/jobs/exporters/kafka_exporter.py b/blockchainetl/jobs/exporters/kafka_exporter.py index 6925131..ac15024 100644 --- a/blockchainetl/jobs/exporters/kafka_exporter.py +++ b/blockchainetl/jobs/exporters/kafka_exporter.py @@ -54,7 +54,7 @@ def export_items(self, items): def export_item(self, item, item_type): data = json.dumps(item).encode('utf-8') logging.debug(data) - return self.producer.produce(self.item_type_to_topic_mapping[item_type], value=data) + return self.producer.produce(self.item_type_to_topic_mapping[item_type],key="0x0000",value=data) def convert_items(self, items): diff --git a/last_synced_block.txt b/last_synced_block.txt index 412a553..c6bfcb7 100644 --- a/last_synced_block.txt +++ b/last_synced_block.txt @@ -1 +1 @@ -500014 +804002 From 8c37eca7add679e06a8fb19a20d06633d0059734 Mon Sep 17 00:00:00 2001 From: Naveen Modi Date: Mon, 21 Aug 2023 10:59:19 +0530 Subject: [PATCH 46/51] new changes --- blockchainetl/jobs/exporters/kafka_exporter.py | 11 +++++++++-- setup.py | 2 +- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/blockchainetl/jobs/exporters/kafka_exporter.py b/blockchainetl/jobs/exporters/kafka_exporter.py index ac15024..703c431 100644 --- a/blockchainetl/jobs/exporters/kafka_exporter.py +++ b/blockchainetl/jobs/exporters/kafka_exporter.py @@ -38,6 +38,12 @@ def get_connection_url(self, output): def open(self): pass + def acked(err, msg): + if err is not None: + logging.error("Failed to deliver message: %s: %s" % (str(msg), str(err))) + else: + logging.debug("Message produced: %s" % (str(msg))) + def export_items(self, items): for item in items: item_type = item.get('type') @@ -54,8 +60,9 @@ def export_items(self, items): def export_item(self, item, item_type): data = json.dumps(item).encode('utf-8') logging.debug(data) - return self.producer.produce(self.item_type_to_topic_mapping[item_type],key="0x0000",value=data) - + return self.producer.produce(self.item_type_to_topic_mapping[item_type],key="0x0000",value=data, callback=self.acked) + + def convert_items(self, items): for item in items: diff --git a/setup.py b/setup.py index ab0a20d..4600d22 100644 --- a/setup.py +++ b/setup.py @@ -39,7 +39,7 @@ def read(fname): 'streaming': [ 'timeout-decorator==0.4.1', 'google-cloud-pubsub==0.39.1', - 'kafka-python==2.0.2' + 'confluent-kafka==2.2.0' ], 'dev': [ 'pytest~=4.3.0' From de692241fadc4ca530ade9c4ce7090b4c8987eaf Mon Sep 17 00:00:00 2001 From: Naveen Modi Date: Mon, 21 Aug 2023 11:55:28 +0530 Subject: [PATCH 47/51] new changes --- blockchainetl/jobs/exporters/bitcoin_flatten.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/blockchainetl/jobs/exporters/bitcoin_flatten.py b/blockchainetl/jobs/exporters/bitcoin_flatten.py index 36252c7..1fc9846 100644 --- a/blockchainetl/jobs/exporters/bitcoin_flatten.py +++ b/blockchainetl/jobs/exporters/bitcoin_flatten.py @@ -32,10 +32,10 @@ def flatten_transformation(payload_dict): "transaction_type": TYPE_EXTERNAL, "sender_address": "|".join(input["addresses"]), "receiver_address": "|".join(output["addresses"]), - "token_outgoing_value": float(token_outgoing_value), + "token_outgoing_value": str(token_outgoing_value), "token_address": default_token_address, - "token_incoming_value": float(token_incoming_value), - "token_outgoing_fee": float(token_outgoing_fee) + "token_incoming_value": str(token_incoming_value), + "token_outgoing_fee": str(token_outgoing_fee) }) else: transformed_transactions.append({ @@ -45,10 +45,10 @@ def flatten_transformation(payload_dict): "transaction_type": TYPE_BLOCK_REWARD, "sender_address": f"{NULL_ADDRESS_MINT}_{datetime.datetime.fromtimestamp(payload_dict['block_timestamp']).month}", "receiver_address": "|".join(output["addresses"]), - "token_outgoing_value": 1e-8 * output["value"], - "token_incoming_value": 1e-8 * output["value"], + "token_outgoing_value": str(1e-8 * output["value"]), + "token_incoming_value": str(1e-8 * output["value"]), "token_address": default_token_address, - "token_outgoing_fee": 0 + "token_outgoing_fee": str(0) }) From f92e88a4de67437e70d097ef3b3b8d8c251157db Mon Sep 17 00:00:00 2001 From: Naveen Modi Date: Mon, 21 Aug 2023 12:11:23 +0530 Subject: [PATCH 48/51] new changes --- blockchainetl/jobs/exporters/kafka_exporter.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/blockchainetl/jobs/exporters/kafka_exporter.py b/blockchainetl/jobs/exporters/kafka_exporter.py index 703c431..f421ffb 100644 --- a/blockchainetl/jobs/exporters/kafka_exporter.py +++ b/blockchainetl/jobs/exporters/kafka_exporter.py @@ -18,13 +18,13 @@ def __init__(self, output, item_type_to_topic_mapping, converters=()): self.connection_url = self.get_connection_url(output) print(self.connection_url) conf = { - "bootstrap.servers": os.getenv("CONFLUENT_ENDPOINT"), + "bootstrap.servers": os.getenv("CONFLUENT_BROKER"), "security.protocol": "SASL_SSL", "sasl.mechanisms": "PLAIN", "client.id": socket.gethostname(), "message.max.bytes": 5242880, - "sasl.username": os.getenv("BLOCKCHAIN_PRODUCER_KEY"), - "sasl.password": os.getenv("BLOCKCHAIN_PRODUCER_SECRET") + "sasl.username": os.getenv("CONFLUENT_USERNAME"), + "sasl.password": os.getenv("CONFLUENT_PASSWORD") } self.producer = Producer(conf) From 4c521e5d205cd3870cf926fbce8ff5b56325d7c9 Mon Sep 17 00:00:00 2001 From: Naveen Modi Date: Wed, 23 Aug 2023 00:30:18 +0530 Subject: [PATCH 49/51] added topic mapping params in cli --- bitcoinetl/cli/stream.py | 5 +++-- bitcoinetl/streaming/streaming_utils.py | 13 ++++++++----- blockchainetl/jobs/exporters/bitcoin_flatten.py | 6 +++--- blockchainetl/jobs/exporters/kafka_exporter.py | 16 +++++++++++++--- 4 files changed, 27 insertions(+), 13 deletions(-) diff --git a/bitcoinetl/cli/stream.py b/bitcoinetl/cli/stream.py index 46170df..0ef01c4 100644 --- a/bitcoinetl/cli/stream.py +++ b/bitcoinetl/cli/stream.py @@ -41,6 +41,7 @@ @click.option('-o', '--output', type=str, help='Google PubSub topic path e.g. projects/your-project/topics/bitcoin_blockchain. ' 'If not specified will print to console.') +@click.option('--topic-mapping', default=None, type=dict[str,str], help="Topic Mapping should be Python dict like {'block': 'producer.bitcoin.hot.blocks','transaction': 'producer.bitcoin.hot.transactions',}") @click.option('-s', '--start-block', default=None, type=int, help='Start block.') @click.option('-c', '--chain', default=Chain.BITCOIN, type=click.Choice(Chain.ALL), help='The type of chain.') @click.option('--period-seconds', default=10, type=int, help='How many seconds to sleep between syncs.') @@ -50,7 +51,7 @@ @click.option('--log-file', default=None, type=str, help='Log file.') @click.option('--pid-file', default=None, type=str, help='pid file.') @click.option('--enrich', default=True, type=bool, help='Enable filling in transactions inputs fields.') -def stream(last_synced_block_file, lag, provider_uri, output, start_block, chain=Chain.BITCOIN, +def stream(last_synced_block_file, lag, provider_uri, output, topic_mapping, start_block, chain=Chain.BITCOIN, period_seconds=10, batch_size=2, block_batch_size=10, max_workers=5, log_file=None, pid_file=None, enrich=True): """Streams all data types to console or Google Pub/Sub.""" @@ -63,7 +64,7 @@ def stream(last_synced_block_file, lag, provider_uri, output, start_block, chain streamer_adapter = BtcStreamerAdapter( bitcoin_rpc=ThreadLocalProxy(lambda: BitcoinRpc(provider_uri)), - item_exporter=get_item_exporter(output), + item_exporter=get_item_exporter(output,topic_mapping), chain=chain, batch_size=batch_size, enable_enrich=enrich, diff --git a/bitcoinetl/streaming/streaming_utils.py b/bitcoinetl/streaming/streaming_utils.py index d747eff..0ba7e45 100644 --- a/bitcoinetl/streaming/streaming_utils.py +++ b/bitcoinetl/streaming/streaming_utils.py @@ -1,7 +1,7 @@ from blockchainetl.jobs.exporters.console_item_exporter import ConsoleItemExporter -def get_item_exporter(output): +def get_item_exporter(output,topic_mapping): item_exporter_type = determine_item_exporter_type(output) if item_exporter_type == ItemExporterType.PUBSUB: @@ -18,10 +18,13 @@ def get_item_exporter(output): elif item_exporter_type == ItemExporterType.KAFKA: from blockchainetl.jobs.exporters.kafka_exporter import KafkaItemExporter - item_exporter = KafkaItemExporter(output, item_type_to_topic_mapping={ - 'block': 'producer.bitcoin.hot.blocks', - 'transaction': 'producer.bitcoin.hot.transactions', - }) + if (topic_mapping is None): + item_exporter = KafkaItemExporter(output, item_type_to_topic_mapping={ + 'block': 'producer.bitcoin.hot.blocks', + 'transaction': 'producer.bitcoin.hot.transactions', + }) + else: + item_exporter = KafkaItemExporter(output, item_type_to_topic_mapping =topic_mapping) else: raise ValueError('Unable to determine item exporter type for output ' + output) diff --git a/blockchainetl/jobs/exporters/bitcoin_flatten.py b/blockchainetl/jobs/exporters/bitcoin_flatten.py index 1fc9846..389331a 100644 --- a/blockchainetl/jobs/exporters/bitcoin_flatten.py +++ b/blockchainetl/jobs/exporters/bitcoin_flatten.py @@ -32,10 +32,10 @@ def flatten_transformation(payload_dict): "transaction_type": TYPE_EXTERNAL, "sender_address": "|".join(input["addresses"]), "receiver_address": "|".join(output["addresses"]), - "token_outgoing_value": str(token_outgoing_value), + "token_outgoing_value": str(float(token_outgoing_value)), "token_address": default_token_address, - "token_incoming_value": str(token_incoming_value), - "token_outgoing_fee": str(token_outgoing_fee) + "token_incoming_value": str(float(token_incoming_value)), + "token_outgoing_fee": str(float(token_outgoing_fee)) }) else: transformed_transactions.append({ diff --git a/blockchainetl/jobs/exporters/kafka_exporter.py b/blockchainetl/jobs/exporters/kafka_exporter.py index f421ffb..22a77f9 100644 --- a/blockchainetl/jobs/exporters/kafka_exporter.py +++ b/blockchainetl/jobs/exporters/kafka_exporter.py @@ -16,7 +16,7 @@ def __init__(self, output, item_type_to_topic_mapping, converters=()): self.item_type_to_topic_mapping = item_type_to_topic_mapping self.converter = CompositeItemConverter(converters) self.connection_url = self.get_connection_url(output) - print(self.connection_url) + # print(self.connection_url) conf = { "bootstrap.servers": os.getenv("CONFLUENT_BROKER"), "security.protocol": "SASL_SSL", @@ -59,9 +59,19 @@ def export_items(self, items): def export_item(self, item, item_type): data = json.dumps(item).encode('utf-8') - logging.debug(data) - return self.producer.produce(self.item_type_to_topic_mapping[item_type],key="0x0000",value=data, callback=self.acked) + message_future = self.write_to_kafka(value=data, + topic=self.item_type_to_topic_mapping[item_type]) + + return message_future + + def write_to_kafka(self, value: str, topic: str): + try: + self.producer.produce(topic,key="0x0000",value=value, callback=self.acked) + except BufferError: + self.logging.error('%% Local producer queue is full (%d messages awaiting delivery): try again\n' % + len(self.producer)) + self.producer.poll(0) def convert_items(self, items): From f7b9cba04ed6310f656a96433a0f007011428a09 Mon Sep 17 00:00:00 2001 From: Naveen Modi Date: Tue, 29 Aug 2023 18:01:57 +0530 Subject: [PATCH 50/51] new changes --- bitcoinetl/cli/stream.py | 9 +++++--- bitcoinetl/streaming/streaming_utils.py | 8 +++---- .../jobs/exporters/kafka_exporter.py | 23 +++++++++---------- litecoin_last_synced_block.txt | 1 + 4 files changed, 22 insertions(+), 19 deletions(-) create mode 100644 litecoin_last_synced_block.txt diff --git a/bitcoinetl/cli/stream.py b/bitcoinetl/cli/stream.py index 0ef01c4..030dc10 100644 --- a/bitcoinetl/cli/stream.py +++ b/bitcoinetl/cli/stream.py @@ -21,7 +21,7 @@ # SOFTWARE. import click - +import json from bitcoinetl.enumeration.chain import Chain from bitcoinetl.rpc.bitcoin_rpc import BitcoinRpc @@ -41,7 +41,7 @@ @click.option('-o', '--output', type=str, help='Google PubSub topic path e.g. projects/your-project/topics/bitcoin_blockchain. ' 'If not specified will print to console.') -@click.option('--topic-mapping', default=None, type=dict[str,str], help="Topic Mapping should be Python dict like {'block': 'producer.bitcoin.hot.blocks','transaction': 'producer.bitcoin.hot.transactions',}") +@click.option('--topic-mapping', default=None, type=str, help="Topic Mapping should be json like {\"block\": \"producer-litcoin-blocks-hot\",\"transaction\": \"producer-litcoin-transactions-hot\"}") @click.option('-s', '--start-block', default=None, type=int, help='Start block.') @click.option('-c', '--chain', default=Chain.BITCOIN, type=click.Choice(Chain.ALL), help='The type of chain.') @click.option('--period-seconds', default=10, type=int, help='How many seconds to sleep between syncs.') @@ -62,9 +62,12 @@ def stream(last_synced_block_file, lag, provider_uri, output, topic_mapping, sta from bitcoinetl.streaming.btc_streamer_adapter import BtcStreamerAdapter from blockchainetl.streaming.streamer import Streamer + if (topic_mapping is not None): + topic_mapping = json.loads(topic_mapping) + streamer_adapter = BtcStreamerAdapter( bitcoin_rpc=ThreadLocalProxy(lambda: BitcoinRpc(provider_uri)), - item_exporter=get_item_exporter(output,topic_mapping), + item_exporter=get_item_exporter(output,topic_mapping,chain), chain=chain, batch_size=batch_size, enable_enrich=enrich, diff --git a/bitcoinetl/streaming/streaming_utils.py b/bitcoinetl/streaming/streaming_utils.py index 0ba7e45..01d73dd 100644 --- a/bitcoinetl/streaming/streaming_utils.py +++ b/bitcoinetl/streaming/streaming_utils.py @@ -1,7 +1,7 @@ from blockchainetl.jobs.exporters.console_item_exporter import ConsoleItemExporter -def get_item_exporter(output,topic_mapping): +def get_item_exporter(output,topic_mapping,chain): item_exporter_type = determine_item_exporter_type(output) if item_exporter_type == ItemExporterType.PUBSUB: @@ -20,11 +20,11 @@ def get_item_exporter(output,topic_mapping): from blockchainetl.jobs.exporters.kafka_exporter import KafkaItemExporter if (topic_mapping is None): item_exporter = KafkaItemExporter(output, item_type_to_topic_mapping={ - 'block': 'producer.bitcoin.hot.blocks', - 'transaction': 'producer.bitcoin.hot.transactions', + 'block': f"producer-{chain}-blocks-hot", + 'transaction': f"producer-{chain}-transactions-hot", }) else: - item_exporter = KafkaItemExporter(output, item_type_to_topic_mapping =topic_mapping) + item_exporter = KafkaItemExporter(output, item_type_to_topic_mapping=topic_mapping) else: raise ValueError('Unable to determine item exporter type for output ' + output) diff --git a/blockchainetl/jobs/exporters/kafka_exporter.py b/blockchainetl/jobs/exporters/kafka_exporter.py index 22a77f9..5ffdf70 100644 --- a/blockchainetl/jobs/exporters/kafka_exporter.py +++ b/blockchainetl/jobs/exporters/kafka_exporter.py @@ -3,7 +3,6 @@ import logging import os import socket - from confluent_kafka import Producer from blockchainetl.jobs.exporters.converters.composite_item_converter import CompositeItemConverter @@ -15,7 +14,7 @@ class KafkaItemExporter: def __init__(self, output, item_type_to_topic_mapping, converters=()): self.item_type_to_topic_mapping = item_type_to_topic_mapping self.converter = CompositeItemConverter(converters) - self.connection_url = self.get_connection_url(output) + # self.connection_url = self.get_connection_url(output) # print(self.connection_url) conf = { "bootstrap.servers": os.getenv("CONFLUENT_BROKER"), @@ -23,8 +22,9 @@ def __init__(self, output, item_type_to_topic_mapping, converters=()): "sasl.mechanisms": "PLAIN", "client.id": socket.gethostname(), "message.max.bytes": 5242880, - "sasl.username": os.getenv("CONFLUENT_USERNAME"), - "sasl.password": os.getenv("CONFLUENT_PASSWORD") + "sasl.username": os.getenv("KAFKA_PRODUCER_KEY"), + "sasl.password": os.getenv("KAFKA_PRODUCER_PASSWORD"), + "queue.buffering.max.messages": 10000000, } self.producer = Producer(conf) @@ -38,11 +38,6 @@ def get_connection_url(self, output): def open(self): pass - def acked(err, msg): - if err is not None: - logging.error("Failed to deliver message: %s: %s" % (str(msg), str(err))) - else: - logging.debug("Message produced: %s" % (str(msg))) def export_items(self, items): for item in items: @@ -66,12 +61,15 @@ def export_item(self, item, item_type): def write_to_kafka(self, value: str, topic: str): + # def acked(err, msg): + # if err is not None: + # self.logging.error('%% Message failed delivery: %s\n' % err) try: - self.producer.produce(topic,key="0x0000",value=value, callback=self.acked) + self.producer.produce(topic,key="0x0000",value=value) + self.producer.poll(0) except BufferError: - self.logging.error('%% Local producer queue is full (%d messages awaiting delivery): try again\n' % + logging.error('%% Local producer queue is full (%d messages awaiting delivery): try again\n' % len(self.producer)) - self.producer.poll(0) def convert_items(self, items): @@ -79,6 +77,7 @@ def convert_items(self, items): yield self.converter.convert_item(item) def close(self): + self.producer.flush() pass diff --git a/litecoin_last_synced_block.txt b/litecoin_last_synced_block.txt new file mode 100644 index 0000000..8b0df76 --- /dev/null +++ b/litecoin_last_synced_block.txt @@ -0,0 +1 @@ +2535320 From 69ba9f27c85d9bf84ac638be7d6b7e716dfd2062 Mon Sep 17 00:00:00 2001 From: Naveen Modi Date: Thu, 7 Sep 2023 16:49:07 +0530 Subject: [PATCH 51/51] updated the flatten logic --- blockchainetl/jobs/exporters/bitcoin_flatten.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/blockchainetl/jobs/exporters/bitcoin_flatten.py b/blockchainetl/jobs/exporters/bitcoin_flatten.py index 389331a..d685701 100644 --- a/blockchainetl/jobs/exporters/bitcoin_flatten.py +++ b/blockchainetl/jobs/exporters/bitcoin_flatten.py @@ -15,11 +15,11 @@ def flatten_transformation(payload_dict): for input in payload_dict["inputs"]: if not payload_dict["is_coinbase"]: if output["value"] > 0: - token_outgoing_value = Decimal((1e-8 * input["value"]) * (1e-8 * output["value"]) / (1e-8 * payload_dict["output_value"])) + token_outgoing_value = Decimal((input["value"]) * (output["value"]) / (payload_dict["output_value"])) else: - token_outgoing_value = Decimal((1e-8 * input["value"]) / payload_dict["output_count"]) + token_outgoing_value = Decimal((input["value"]) / payload_dict["output_count"]) if input["value"] > 0: - token_incoming_value = Decimal((1e-8 * input["value"]) * (1e-8 * output["value"]) / (1e-8 * payload_dict["input_value"])) + token_incoming_value = Decimal((input["value"]) * (output["value"]) / (payload_dict["input_value"])) else: token_incoming_value = 0 @@ -45,8 +45,8 @@ def flatten_transformation(payload_dict): "transaction_type": TYPE_BLOCK_REWARD, "sender_address": f"{NULL_ADDRESS_MINT}_{datetime.datetime.fromtimestamp(payload_dict['block_timestamp']).month}", "receiver_address": "|".join(output["addresses"]), - "token_outgoing_value": str(1e-8 * output["value"]), - "token_incoming_value": str(1e-8 * output["value"]), + "token_outgoing_value": str(output["value"]), + "token_incoming_value": str(output["value"]), "token_address": default_token_address, "token_outgoing_fee": str(0) })