Merge remote-tracking branch 'upstream/main' into pit_fix

microsoft · Sep 3, 2024 · 958291e · 958291e
2 parents e42496a + b45b006
commit 958291e
Show file tree

Hide file tree

Showing 73 changed files with 1,838 additions and 477 deletions.
diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
@@ -51,8 +51,8 @@ jobs:
         python setup.py bdist_wheel
     - name: Build and publish
       env:
-        TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
-        TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
+        TWINE_USERNAME: __token__
+        TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
       run: |
         twine upload dist/*
 
@@ -72,10 +72,10 @@ jobs:
         python-version: 3.7
     - name: Install dependencies
       run: |
-        pip install twine  
+        pip install twine
     - name: Build and publish
       env:
-        TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
-        TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
+        TWINE_USERNAME: __token__
+        TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
       run: |
         twine upload dist/pyqlib-*-manylinux*.whl
diff --git a/.github/workflows/release-drafter.yml b/.github/workflows/release-drafter.yml
@@ -6,8 +6,14 @@ on:
     branches:
       - main
 
+permissions:
+  contents: read
+
 jobs:
   update_release_draft:
+    permissions:
+      contents: write
+      pull-requests: read
     runs-on: ubuntu-latest
     steps:
       # Drafts your next Release notes as Pull Requests are merged into "master"

diff --git a/.github/workflows/test_qlib_from_pip.yml b/.github/workflows/test_qlib_from_pip.yml
@@ -13,7 +13,10 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-latest]
+        # Since macos-latest changed from 12.7.4 to 14.4.1,
+        # the minimum python version that matches a 14.4.1 version of macos is 3.10,
+        # so we limit the macos version to macos-12.
+        os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-12]
         # not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129
         python-version: [3.7, 3.8]
 
@@ -42,6 +45,9 @@ jobs:
 
     - name: Qlib installation test
       run: |
+        # 2024-05-30 scs has released a new version: 3.2.4.post2,
+        # This will cause the CI to fail, so we have limited the version of scs for now.
+        python -m pip install "scs<=3.2.4"
         python -m pip install pyqlib
 
     - name: Install Lightgbm for MacOS
@@ -62,5 +68,8 @@ jobs:
         cd qlib
 
     - name: Test workflow by config
+      # On macos-11 system, it will lead to "Segmentation fault: 11" error,
+      # which may be caused by the excessive memory overhead of macos-11 system, so we disable macos-11 temporarily here.
+      if: ${{ matrix.os != 'macos-11' }}
       run: |
         qrun examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml
diff --git a/.github/workflows/test_qlib_from_source.yml b/.github/workflows/test_qlib_from_source.yml
@@ -14,7 +14,10 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-latest]
+        # Since macos-latest changed from 12.7.4 to 14.4.1,
+        # the minimum python version that matches a 14.4.1 version of macos is 3.10,
+        # so we limit the macos version to macos-12.
+        os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-12]
         # not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129
         python-version: [3.7, 3.8]
 
@@ -69,8 +72,10 @@ jobs:
         black . -l 120 --check --diff
 
     - name: Make html with sphinx
+      # Since read the docs builds on ubuntu 22.04, we only need to test that the build passes on ubuntu 22.04.
+      if: ${{ matrix.os == 'ubuntu-22.04' }}
       run: |
-        cd docs 
+        cd docs
         sphinx-build -W --keep-going -b html . _build
         cd ..
 
@@ -156,11 +161,16 @@ jobs:
 
     # Run after data downloads
     - name: Check Qlib ipynb with nbconvert
+      # Running the nbconvert check on a macos-11 system results in a "Kernel died" error, so we've temporarily disabled macos-11 here.
+      if: ${{ matrix.os != 'macos-11' }}
       run: |
         # add more ipynb files in future
         jupyter nbconvert --to notebook --execute examples/workflow_by_code.ipynb
 
     - name: Test workflow by config (install from source)
+      # On macos-11 system, it will lead to "Segmentation fault: 11" error,
+      # which may be caused by the excessive memory overhead of macos-11 system, so we disable macos-11 temporarily here.
+      if: ${{ matrix.os != 'macos-11' }}
       run: |
         python -m pip install numba
         python qlib/workflow/cli.py examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158.yaml

diff --git a/.github/workflows/test_qlib_from_source_slow.yml b/.github/workflows/test_qlib_from_source_slow.yml
@@ -14,7 +14,10 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-11, macos-latest]
+        # Since macos-latest changed from 12.7.4 to 14.4.1,
+        # the minimum python version that matches a 14.4.1 version of macos is 3.10,
+        # so we limit the macos version to macos-12.
+        os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-12]
         # not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129
         python-version: [3.7, 3.8]
 

diff --git a/.gitignore b/.gitignore
@@ -49,4 +49,4 @@ tags
 *.swp
 
 ./pretrain
-.idea/
+.idea/
diff --git a/.readthedocs.yml → .readthedocs.yaml b/.readthedocs.yml → .readthedocs.yaml
@@ -5,6 +5,12 @@
 # Required
 version: 2
 
+# Set the version of Python and other tools you might need
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "3.7"
+
 # Build documentation in the docs/ directory with Sphinx
 sphinx:
   configuration: docs/conf.py
@@ -14,7 +20,6 @@ formats: all
 
 # Optionally set the version of Python and requirements required to build your docs
 python:
-  version: 3.7
   install:
     - requirements: docs/requirements.txt
     - method: pip

diff --git a/README.md b/README.md
@@ -11,6 +11,7 @@
 Recent released features
 | Feature | Status |
 | --                      | ------    |
+| 🔥LLM-driven Auto Quant Factory🔥 | 🚀 Released in [♾️RD-Agent](https://github.com/microsoft/RD-Agent) on Aug 8, 2024 |
 | KRNN and Sandwich models | :chart_with_upwards_trend: [Released](https://github.com/microsoft/qlib/pull/1414/) on May 26, 2023 |
 | Release Qlib v0.9.0 | :octocat: [Released](https://github.com/microsoft/qlib/releases/tag/v0.9.0) on Dec 9, 2022 |
 | RL Learning Framework | :hammer: :chart_with_upwards_trend: Released on Nov 10, 2022. [#1332](https://github.com/microsoft/qlib/pull/1332), [#1322](https://github.com/microsoft/qlib/pull/1322), [#1316](https://github.com/microsoft/qlib/pull/1316),[#1299](https://github.com/microsoft/qlib/pull/1299),[#1263](https://github.com/microsoft/qlib/pull/1263), [#1244](https://github.com/microsoft/qlib/pull/1244), [#1169](https://github.com/microsoft/qlib/pull/1169), [#1125](https://github.com/microsoft/qlib/pull/1125), [#1076](https://github.com/microsoft/qlib/pull/1076)|
@@ -40,7 +41,7 @@ Recent released features
 Features released before 2021 are not listed here.
 
 <p align="center">
-  <img src="http://fintech.msra.cn/images_v070/logo/1.png" />
+  <img src="docs/_static/img/logo/1.png" />
 </p>
 
 Qlib is an open-source, AI-oriented quantitative investment platform that aims to realize the potential, empower research, and create value using AI technologies in quantitative investment, from exploring ideas to implementing productions. Qlib supports diverse machine learning modeling paradigms, including supervised learning, market dynamics modeling, and reinforcement learning.
@@ -166,13 +167,29 @@ Also, users can install the latest dev version ``Qlib`` by the source code accor
 * Clone the repository and install ``Qlib`` as follows.
     ```bash
     git clone https://github.com/microsoft/qlib.git && cd qlib
-    pip install .
+    pip install .  # `pip install -e .[dev]` is recommended for development. check details in docs/developer/code_standard_and_dev_guide.rst
     ```
   **Note**:  You can install Qlib with `python setup.py install` as well. But it is not the recommended approach. It will skip `pip` and cause obscure problems. For example, **only** the command ``pip install .`` **can** overwrite the stable version installed by ``pip install pyqlib``, while the command ``python setup.py install`` **can't**.
 
 **Tips**: If you fail to install `Qlib` or run the examples in your environment,  comparing your steps and the [CI workflow](.github/workflows/test_qlib_from_source.yml) may help you find the problem.
 
+**Tips for Mac**: If you are using Mac with M1, you might encounter issues in building the wheel for LightGBM, which is due to missing dependencies from OpenMP. To solve the problem, install openmp first with ``brew install libomp`` and then run ``pip install .`` to build it successfully. 
+
 ## Data Preparation
+❗ Due to more restrict data security policy. The offical dataset is disabled temporarily. You can try [this data source](https://github.com/chenditc/investment_data/releases) contributed by the community.
+Here is an example to download the data updated on 20240809.
+```bash
+wget https://github.com/chenditc/investment_data/releases/download/2024-08-09/qlib_bin.tar.gz
+mkdir -p ~/.qlib/qlib_data/cn_data
+tar -zxvf qlib_bin.tar.gz -C ~/.qlib/qlib_data/cn_data --strip-components=1
+rm -f qlib_bin.tar.gz
+```
+
+The official dataset below will resume in short future.
+
+
+----
+
 Load and prepare data by running the following code:
 
 ### Get with module
@@ -292,19 +309,19 @@ Qlib provides a tool named `qrun` to run the whole workflow automatically (inclu
 2. Graphical Reports Analysis: Run `examples/workflow_by_code.ipynb` with `jupyter notebook` to get graphical reports
     - Forecasting signal (model prediction) analysis
       - Cumulative Return of groups
-      ![Cumulative Return](http://fintech.msra.cn/images_v070/analysis/analysis_model_cumulative_return.png?v=0.1)
+      ![Cumulative Return](https://github.com/microsoft/qlib/blob/main/docs/_static/img/analysis/analysis_model_cumulative_return.png)
       - Return distribution
-      ![long_short](http://fintech.msra.cn/images_v070/analysis/analysis_model_long_short.png?v=0.1)
+      ![long_short](https://github.com/microsoft/qlib/blob/main/docs/_static/img/analysis/analysis_model_long_short.png)
       - Information Coefficient (IC)
-      ![Information Coefficient](http://fintech.msra.cn/images_v070/analysis/analysis_model_IC.png?v=0.1)
-      ![Monthly IC](http://fintech.msra.cn/images_v070/analysis/analysis_model_monthly_IC.png?v=0.1)
-      ![IC](http://fintech.msra.cn/images_v070/analysis/analysis_model_NDQ.png?v=0.1)
+      ![Information Coefficient](https://github.com/microsoft/qlib/blob/main/docs/_static/img/analysis/analysis_model_IC.png)
+      ![Monthly IC](https://github.com/microsoft/qlib/blob/main/docs/_static/img/analysis/analysis_model_monthly_IC.png)
+      ![IC](https://github.com/microsoft/qlib/blob/main/docs/_static/img/analysis/analysis_model_NDQ.png)
       - Auto Correlation of forecasting signal (model prediction)
-      ![Auto Correlation](http://fintech.msra.cn/images_v070/analysis/analysis_model_auto_correlation.png?v=0.1)
+      ![Auto Correlation](https://github.com/microsoft/qlib/blob/main/docs/_static/img/analysis/analysis_model_auto_correlation.png)
 
     - Portfolio analysis
       - Backtest return
-      ![Report](http://fintech.msra.cn/images_v070/analysis/report.png?v=0.1)
+      ![Report](https://github.com/microsoft/qlib/blob/main/docs/_static/img/analysis/report.png)
       <!-- 
       - Score IC
       ![Score IC](docs/_static/img/score_ic.png)
@@ -483,7 +500,7 @@ Qlib data are stored in a compact format, which is efficient to be combined into
 Join IM discussion groups:
 |[Gitter](https://gitter.im/Microsoft/qlib)|
 |----|
-|![image](http://fintech.msra.cn/images_v070/qrcode/gitter_qr.png)|
+|![image](https://github.com/microsoft/qlib/blob/main/docs/_static/img/qrcode/gitter_qr.png)|
 
 # Contributing
 We appreciate all contributions and thank all the contributors!

diff --git a/docs/component/model.rst b/docs/component/model.rst
@@ -86,7 +86,7 @@ Example
             },
         }
 
-        # model initiaiton
+        # model initialization
         model = init_instance_by_config(task["model"])
         dataset = init_instance_by_config(task["dataset"])
 

diff --git a/docs/developer/code_standard_and_dev_guide.rst b/docs/developer/code_standard_and_dev_guide.rst
@@ -60,4 +60,4 @@ The `[dev]` option will help you to install some related packages when developin
 
 .. code-block:: bash
 
-    pip install -e .[dev]
+    pip install -e ".[dev]"
diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -5,3 +5,4 @@ scipy
 scikit-learn
 pandas
 tianshou
+sphinx_rtd_theme
diff --git a/examples/benchmarks/GeneralPtNN/README.md b/examples/benchmarks/GeneralPtNN/README.md
@@ -0,0 +1,19 @@
+
+
+# Introduction
+
+What is GeneralPtNN
+- Fix previous design that fail to support both Time-series and tabular data
+- Now you can just replace the Pytorch model structure to run a NN model.
+
+We provide an example to demonstrate the effectiveness of the current design.
+- `workflow_config_gru.yaml` align with previous results [GRU(Kyunghyun Cho, et al.)](../README.md#Alpha158-dataset)
+  - `workflow_config_gru2mlp.yaml` to demonstrate we can convert config from time-series to tabular data with minimal changes
+    - You only have to change the net & dataset class to make the conversion.
+- `workflow_config_mlp.yaml` achieved similar functionality with [MLP](../README.md#Alpha158-dataset)
+
+# TODO
+
+- We will align existing models to current design.
+
+- The result of `workflow_config_mlp.yaml` is different with the result of [MLP](../README.md#Alpha158-dataset) since GeneralPtNN has a different stopping method compared to previous implementations. Specificly, GeneralPtNN controls training according to epoches, whereas previous methods controlled by max_steps. 
diff --git a/examples/benchmarks/GeneralPtNN/workflow_config_gru.yaml b/examples/benchmarks/GeneralPtNN/workflow_config_gru.yaml
@@ -0,0 +1,100 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+    infer_processors:
+        - class: FilterCol
+          kwargs:
+              fields_group: feature
+              col_list: ["RESI5", "WVMA5", "RSQR5", "KLEN", "RSQR10", "CORR5", "CORD5", "CORR10", 
+                            "ROC60", "RESI10", "VSTD5", "RSQR60", "CORR60", "WVMA60", "STD5", 
+                            "RSQR20", "CORD60", "CORD10", "CORR20", "KLOW"
+                        ]
+        - class: RobustZScoreNorm
+          kwargs:
+              fields_group: feature
+              clip_outlier: true
+        - class: Fillna
+          kwargs:
+              fields_group: feature
+    learn_processors:
+        - class: DropnaLabel
+        - class: CSRankNorm
+          kwargs:
+              fields_group: label
+    label: ["Ref($close, -2) / Ref($close, -1) - 1"] 
+
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy
+        kwargs:
+            signal: <PRED>
+            topk: 50
+            n_drop: 5
+    backtest:
+        start_time: 2017-01-01
+        end_time: 2020-08-01
+        account: 100000000
+        benchmark: *benchmark
+        exchange_kwargs:
+            limit_threshold: 0.095
+            deal_price: close
+            open_cost: 0.0005
+            close_cost: 0.0015
+            min_cost: 5
+task:
+    model:
+        class: GeneralPTNN
+        module_path: qlib.contrib.model.pytorch_general_nn
+        kwargs:
+            n_epochs: 200
+            lr: 2e-4
+            early_stop: 10
+            batch_size: 800
+            metric: loss
+            loss: mse
+            n_jobs: 20
+            GPU: 0
+            pt_model_uri: "qlib.contrib.model.pytorch_gru_ts.GRUModel"
+            pt_model_kwargs: {
+                "d_feat": 20,
+                "hidden_size": 64,
+                "num_layers": 2,
+                "dropout": 0.,
+            }
+    dataset:
+        class: TSDatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha158
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+            step_len: 20
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            model: <MODEL>
+            dataset: <DATASET>
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config
-Original file line number
+Diff line change
@@ Expand Up / @@ -49,4 +49,4 @@ tags @@
     *.swp
     ./pretrain
-    .idea/
+    .idea/
Original file line number	Diff line number	Diff line change
Expand Up		@@ -60,4 +60,4 @@ The `[dev]` option will help you to install some related packages when developin

		.. code-block:: bash

		pip install -e .[dev]
		pip install -e ".[dev]"