From 47bd13295b48c591ae5d4b864690f7afd6e30d42 Mon Sep 17 00:00:00 2001 From: Fivele-Li <128388363+Fivele-Li@users.noreply.github.com> Date: Fri, 21 Jun 2024 11:22:23 +0800 Subject: [PATCH] Fix Yahoo daily data format inconsistent (#1517) * Fix FutureWarning: Passing unit-less datetime64 dtype to .astype is deprecated and will raise in a future version. Pass 'datetime64[ns]' instead * align index format while end date contains current day data * fix black * fix black * optimize code * optimize code * optimize code * fix ci error * check ci error * fix ci error * check ci error * check ci error * check ci error * check ci error * check ci error * check ci error * fix ci error * fix ci error * fix ci error * fix ci error * fix ci error --------- Co-authored-by: Cadenza-Li <362237642@qq.com> Co-authored-by: Linlang --- .github/workflows/test_qlib_from_pip.yml | 3 +++ .github/workflows/test_qlib_from_source.yml | 9 ++++++++- scripts/data_collector/yahoo/collector.py | 3 +++ setup.py | 4 ++-- 4 files changed, 16 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test_qlib_from_pip.yml b/.github/workflows/test_qlib_from_pip.yml index 4b9fa7c34d..fd1e8c4cf4 100644 --- a/.github/workflows/test_qlib_from_pip.yml +++ b/.github/workflows/test_qlib_from_pip.yml @@ -68,5 +68,8 @@ jobs: cd qlib - name: Test workflow by config + # On macos-11 system, it will lead to "Segmentation fault: 11" error, + # which may be caused by the excessive memory overhead of macos-11 system, so we disable macos-11 temporarily here. + if: ${{ matrix.os != 'macos-11' }} run: | qrun examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml diff --git a/.github/workflows/test_qlib_from_source.yml b/.github/workflows/test_qlib_from_source.yml index 38f32da8ed..885d8fa439 100644 --- a/.github/workflows/test_qlib_from_source.yml +++ b/.github/workflows/test_qlib_from_source.yml @@ -72,8 +72,10 @@ jobs: black . -l 120 --check --diff - name: Make html with sphinx + # Since read the docs builds on ubuntu 22.04, we only need to test that the build passes on ubuntu 22.04. + if: ${{ matrix.os == 'ubuntu-22.04' }} run: | - cd docs + cd docs sphinx-build -W --keep-going -b html . _build cd .. @@ -159,11 +161,16 @@ jobs: # Run after data downloads - name: Check Qlib ipynb with nbconvert + # Running the nbconvert check on a macos-11 system results in a "Kernel died" error, so we've temporarily disabled macos-11 here. + if: ${{ matrix.os != 'macos-11' }} run: | # add more ipynb files in future jupyter nbconvert --to notebook --execute examples/workflow_by_code.ipynb - name: Test workflow by config (install from source) + # On macos-11 system, it will lead to "Segmentation fault: 11" error, + # which may be caused by the excessive memory overhead of macos-11 system, so we disable macos-11 temporarily here. + if: ${{ matrix.os != 'macos-11' }} run: | python -m pip install numba python qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml diff --git a/scripts/data_collector/yahoo/collector.py b/scripts/data_collector/yahoo/collector.py index 25e2963883..d2fa0b06f7 100644 --- a/scripts/data_collector/yahoo/collector.py +++ b/scripts/data_collector/yahoo/collector.py @@ -796,6 +796,9 @@ def download_data( # get 1m data $ python collector.py download_data --source_dir ~/.qlib/stock_data/source --region CN --start 2020-11-01 --end 2020-11-10 --delay 0.1 --interval 1m """ + if self.interval == "1d" and pd.Timestamp(end) > pd.Timestamp(datetime.datetime.now().strftime("%Y-%m-%d")): + raise ValueError(f"end_date: {end} is greater than the current date.") + super(Run, self).download_data(max_collector_count, delay, start, end, check_data_length, limit_nums) def normalize_data( diff --git a/setup.py b/setup.py index 1feabd30c1..a0dc9962c6 100644 --- a/setup.py +++ b/setup.py @@ -46,7 +46,7 @@ def get_version(rel_path: str) -> str: REQUIRED = [ "numpy>=1.12.0, <1.24", "pandas>=0.25.1", - "scipy>=1.0.0", + "scipy>=1.7.3", "requests>=2.18.0", "sacred>=0.7.4", "python-socketio", @@ -82,7 +82,7 @@ def get_version(rel_path: str) -> str: "dill", "dataclasses;python_version<'3.7'", "filelock", - "jinja2<3.1.0", # for passing the readthedocs workflow. + "jinja2", "gym", # Installing the latest version of protobuf for python versions below 3.8 will cause unit tests to fail. "protobuf<=3.20.1;python_version<='3.8'",