Skip to content

Commit

Permalink
Merge in staging
Browse files Browse the repository at this point in the history
Signed-off-by: Simon Zhao <[email protected]>
  • Loading branch information
SimonYansenZhao committed Nov 12, 2024
2 parents 4a544b2 + 12bc1e4 commit 450bf17
Show file tree
Hide file tree
Showing 22 changed files with 215 additions and 164 deletions.
28 changes: 0 additions & 28 deletions .devcontainer/Dockerfile

This file was deleted.

88 changes: 47 additions & 41 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -1,44 +1,50 @@
{
"name": "Recommenders",
"build": {
"dockerfile": "Dockerfile",
"context": "..",
"args": {
// Python version: 3, 3.6, 3.7
"PYTHON_VERSION": "3.7",
"REMOTE_USER": "vscode"
}
},
"name": "Recommenders",
// Version list: https://github.com/devcontainers/images/tree/main/src/base-ubuntu
// Includes: curl, wget, ca-certificates, git, Oh My Zsh!,
"image": "mcr.microsoft.com/devcontainers/base:ubuntu-24.04",
"hostRequirements": {
"cpus": 4,
"memory": "16gb",
"storage": "32gb"
},
"features": {
// https://github.com/devcontainers/features/blob/main/src/anaconda/devcontainer-feature.json
"ghcr.io/devcontainers/features/anaconda:1": {
"version": "2024.06-1"
}
},
"customizations": {
"vscode": {
// Set *default* container specific settings.json values on container create.
"settings": {
"[python]": {
"editor.defaultFormatter": "ms-python.black-formatter",
"editor.formatOnSave": true,
"editor.codeActionsOnSave": {
"source.organizeImports": "explicit"
}
},
"isort.args": ["--profile", "black"],
"python.analysis.autoImportCompletions": true,
"python.defaultInterpreterPath": "/usr/local/conda/envs/Recommenders/bin/python",
"python.testing.pytestEnabled": true,
// set the directory where all tests are
"python.testing.pytestArgs": ["tests"]
},
// Add the IDs of extensions you want installed when the container is created.
"extensions": [
"ms-python.black-formatter", // https://marketplace.visualstudio.com/items?itemName=ms-python.black-formatter
"ms-python.isort", // https://marketplace.visualstudio.com/items?itemName=ms-python.isort
"ms-python.mypy-type-checker", // https://marketplace.visualstudio.com/items?itemName=ms-python.mypy-type-checker
"ms-python.pylint", // https://marketplace.visualstudio.com/items?itemName=ms-python.pylint
"ms-python.python", // https://marketplace.visualstudio.com/items?itemName=ms-python.python
"ms-toolsai.datawrangler", // https://marketplace.visualstudio.com/items?itemName=ms-toolsai.datawrangler
"ms-toolsai.jupyter" // https://marketplace.visualstudio.com/items?itemName=ms-toolsai.jupyter
]
}
},

// Set *default* container specific settings.json values on container create.
"settings": {
"python.pythonPath": "/usr/local/bin/python",
"python.languageServer": "Pylance",
"python.linting.enabled": true,
"python.linting.pylintEnabled": true,
"python.formatting.autopep8Path": "/usr/local/py-utils/bin/autopep8",
"python.formatting.blackPath": "/usr/local/py-utils/bin/black",
"python.formatting.yapfPath": "/usr/local/py-utils/bin/yapf",
"python.linting.banditPath": "/usr/local/py-utils/bin/bandit",
"python.linting.flake8Path": "/usr/local/py-utils/bin/flake8",
"python.linting.mypyPath": "/usr/local/py-utils/bin/mypy",
"python.linting.pycodestylePath": "/usr/local/py-utils/bin/pycodestyle",
"python.linting.pydocstylePath": "/usr/local/py-utils/bin/pydocstyle",
"python.linting.pylintPath": "/usr/local/py-utils/bin/pylint"
},

// Add the IDs of extensions you want installed when the container is created.
"extensions": [
"ms-python.python",
"ms-python.vscode-pylance"
],

// Use 'forwardPorts' to make a list of ports inside the container available locally.
"forwardPorts": [8888],

// Use 'postCreateCommand' to run commands after the container is created.
"postCreateCommand": "pip install -U pip && pip install --user -e .[dev,examples,spark,xlearn]",

// Comment out connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root.
"remoteUser": "vscode"
// Use 'postCreateCommand' to run commands after the container is created.
"postCreateCommand": "conda create -n Recommenders -c conda-forge -y python=3.10 openjdk=21 pip && conda init bash && bash -c -i 'conda activate Recommenders && pip install -e .[dev,spark]' && conda config --set auto_activate_base false"
}
14 changes: 11 additions & 3 deletions .github/actions/azureml-test/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,15 @@ inputs:
TEST_KIND:
required: true
description: Type of test - unit or nightly
AZUREML_TEST_CREDENTIALS:
AZUREML_TEST_UMI_CLIENT_ID:
required: true
description: Credentials for AzureML login
description: AzureML User-managed identity client ID
AZUREML_TEST_UMI_TENANT_ID:
required: true
description: AzureML User-managed identity tenant ID
AZUREML_TEST_UMI_SUB_ID:
required: true
description: AzureML User-managed identity subscription ID
AZUREML_TEST_SUBID:
required: true
description: AzureML subscription ID
Expand Down Expand Up @@ -53,7 +59,9 @@ runs:
- name: Log in to Azure
uses: azure/login@v2
with:
creds: ${{ inputs.AZUREML_TEST_CREDENTIALS }}
client-id: ${{ inputs.AZUREML_TEST_UMI_CLIENT_ID }}
tenant-id: ${{ inputs.AZUREML_TEST_UMI_TENANT_ID }}
subscription-id: ${{ inputs.AZUREML_TEST_UMI_SUB_ID }}
- name: Submit tests to AzureML
shell: bash
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/actions/get-test-groups/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ description: "Get test group names from tests_groups.py"
inputs:
TEST_KIND:
required: true
description: Type of test - unit or nightly
description: Type of test - pr gate or nightly
TEST_ENV:
required: false
description: Test environment - cpu, gpu or spark
Expand Down
6 changes: 5 additions & 1 deletion .github/workflows/azureml-cpu-nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ jobs:
needs: get-test-groups
name: ${{ join(matrix.*, ', ') }}
runs-on: ubuntu-latest
permissions:
id-token: write # This is required for requesting the JWT
strategy:
max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration
matrix:
Expand All @@ -79,7 +81,9 @@ jobs:
EXP_NAME: recommenders-nightly-${{ matrix.test-group }}-python${{ matrix.python-version }}-${{ github.ref }}
ENV_NAME: recommenders-${{ github.sha }}-python${{ matrix.python-version }}${{ contains(matrix.test-group, 'gpu') && '-gpu' || '' }}${{ contains(matrix.test-group, 'spark') && '-spark' || '' }}
TEST_KIND: 'nightly'
AZUREML_TEST_CREDENTIALS: ${{ secrets.AZUREML_TEST_CREDENTIALS }}
AZUREML_TEST_UMI_CLIENT_ID: ${{ secrets.AZUREML_TEST_UMI_CLIENT_ID }}
AZUREML_TEST_UMI_TENANT_ID: ${{ secrets.AZUREML_TEST_UMI_TENANT_ID }}
AZUREML_TEST_UMI_SUB_ID: ${{ secrets.AZUREML_TEST_UMI_SUB_ID }}
AZUREML_TEST_SUBID: ${{ secrets.AZUREML_TEST_SUBID }}
PYTHON_VERSION: ${{ matrix.python-version }}
TEST_GROUP: ${{ matrix.test-group }}
6 changes: 5 additions & 1 deletion .github/workflows/azureml-gpu-nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ jobs:
needs: get-test-groups
name: ${{ join(matrix.*, ', ') }}
runs-on: ubuntu-latest
permissions:
id-token: write # This is required for requesting the JWT
strategy:
max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration
matrix:
Expand All @@ -79,7 +81,9 @@ jobs:
EXP_NAME: recommenders-nightly-${{ matrix.test-group }}-python${{ matrix.python-version }}-${{ github.ref }}
ENV_NAME: recommenders-${{ github.sha }}-python${{ matrix.python-version }}${{ contains(matrix.test-group, 'gpu') && '-gpu' || '' }}${{ contains(matrix.test-group, 'spark') && '-spark' || '' }}
TEST_KIND: 'nightly'
AZUREML_TEST_CREDENTIALS: ${{ secrets.AZUREML_TEST_CREDENTIALS }}
AZUREML_TEST_UMI_CLIENT_ID: ${{ secrets.AZUREML_TEST_UMI_CLIENT_ID }}
AZUREML_TEST_UMI_TENANT_ID: ${{ secrets.AZUREML_TEST_UMI_TENANT_ID }}
AZUREML_TEST_UMI_SUB_ID: ${{ secrets.AZUREML_TEST_UMI_SUB_ID }}
AZUREML_TEST_SUBID: ${{ secrets.AZUREML_TEST_SUBID }}
PYTHON_VERSION: ${{ matrix.python-version }}
TEST_GROUP: ${{ matrix.test-group }}
6 changes: 5 additions & 1 deletion .github/workflows/azureml-spark-nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ jobs:
needs: get-test-groups
name: ${{ join(matrix.*, ', ') }}
runs-on: ubuntu-latest
permissions:
id-token: write # This is required for requesting the JWT
strategy:
max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration
matrix:
Expand All @@ -78,7 +80,9 @@ jobs:
EXP_NAME: recommenders-nightly-${{ matrix.test-group }}-python${{ matrix.python-version }}-${{ github.ref }}
ENV_NAME: recommenders-${{ github.sha }}-python${{ matrix.python-version }}${{ contains(matrix.test-group, 'gpu') && '-gpu' || '' }}${{ contains(matrix.test-group, 'spark') && '-spark' || '' }}
TEST_KIND: 'nightly'
AZUREML_TEST_CREDENTIALS: ${{ secrets.AZUREML_TEST_CREDENTIALS }}
AZUREML_TEST_UMI_CLIENT_ID: ${{ secrets.AZUREML_TEST_UMI_CLIENT_ID }}
AZUREML_TEST_UMI_TENANT_ID: ${{ secrets.AZUREML_TEST_UMI_TENANT_ID }}
AZUREML_TEST_UMI_SUB_ID: ${{ secrets.AZUREML_TEST_UMI_SUB_ID }}
AZUREML_TEST_SUBID: ${{ secrets.AZUREML_TEST_SUBID }}
PYTHON_VERSION: ${{ matrix.python-version }}
TEST_GROUP: ${{ matrix.test-group }}
6 changes: 5 additions & 1 deletion .github/workflows/azureml-unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ jobs:
needs: get-test-groups
name: ${{ join(matrix.*, ', ') }}
runs-on: ubuntu-latest
permissions:
id-token: write # This is required for requesting the JWT
strategy:
max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration
matrix:
Expand All @@ -68,7 +70,9 @@ jobs:
EXP_NAME: recommenders-unit-${{ matrix.test-group }}-python${{ matrix.python-version }}-${{ github.sha }}
ENV_NAME: recommenders-${{ github.sha }}-python${{ matrix.python-version }}${{ contains(matrix.test-group, 'gpu') && '-gpu' || '' }}${{ contains(matrix.test-group, 'spark') && '-spark' || '' }}
TEST_KIND: 'unit'
AZUREML_TEST_CREDENTIALS: ${{ secrets.AZUREML_TEST_CREDENTIALS }}
AZUREML_TEST_UMI_CLIENT_ID: ${{ secrets.AZUREML_TEST_UMI_CLIENT_ID }}
AZUREML_TEST_UMI_TENANT_ID: ${{ secrets.AZUREML_TEST_UMI_TENANT_ID }}
AZUREML_TEST_UMI_SUB_ID: ${{ secrets.AZUREML_TEST_UMI_SUB_ID }}
AZUREML_TEST_SUBID: ${{ secrets.AZUREML_TEST_SUBID }}
PYTHON_VERSION: ${{ matrix.python-version }}
TEST_GROUP: ${{ matrix.test-group }}
Expand Down
2 changes: 2 additions & 0 deletions AUTHORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ To contributors: please add your name to the list when you submit a patch to the
* **[Aaron He](https://github.com/AaronHeee)**
* Reco utils of NCF
* Deep dive notebook demonstrating the use of NCF
* **[Aaron Palpallatoc](https://github.com/ubergonmx)**
* Corrected variable in pickle dump in `mind_utils.ipynb` notebook
* **[Abir Chakraborty](https://github.com/aeroabir)**
* Self-Attentive Sequential Recommendation (SASRec)
* Sequential Recommendation Via Personalized Transformer (SSEPT)
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ We provide a [benchmark notebook](examples/06_benchmarks/movielens.ipynb) to ill

This project welcomes contributions and suggestions. Before contributing, please see our [contribution guidelines](CONTRIBUTING.md).

This project adheres to [Microsoft's Open Source Code of Conduct](CODE_OF_CONDUCT.md) in order to foster a welcoming and inspiring community for all.
This project adheres to this [Code of Conduct](CODE_OF_CONDUCT.md) in order to foster a welcoming and inspiring community for all.

## Build Status

Expand Down
11 changes: 7 additions & 4 deletions SETUP.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,16 +50,19 @@ pip install recommenders[spark]
# c. Run the notebook.
```

## Setup for Azure Databricks
## Setup for Databricks

The following instructions were tested on Azure Databricks Runtime 12.2 LTS (Apache Spark version 3.3.2) and 11.3 LTS (Apache Spark version 3.3.0).
As of April 2023, Databricks Runtime 13 is not yet supported as it is on Python 3.10.
The following instructions were tested on Databricks Runtime 15.4 LTS (Apache Spark version 3.5.0), 14.3 LTS (Apache Spark version 3.5.0), 13.3 LTS (Apache Spark version 3.4.1), and 12.2 LTS (Apache Spark version 3.3.2). We have tested the runtime on python 3.9,3.10 and 3.11.

After an Azure Databricks cluster is provisioned:
After an Databricks cluster is provisioned:
```bash
# 1. Go to the "Compute" tab on the left of the page, click on the provisioned cluster and then click on "Libraries".
# 2. Click the "Install new" button.
# 3. In the popup window, select "PyPI" as the library source. Enter "recommenders[examples]" as the package name. Click "Install" to install the package.
# 4. Now, repeat the step 3 for below packages:
# a. numpy<2.0.0
# b. pandera<=0.18.3
# c. scipy<=1.13.1
```

### Prepare Azure Databricks for Operationalization
Expand Down
2 changes: 1 addition & 1 deletion examples/01_prepare_data/mind_utils.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@
" pickle.dump(word_dict, f)\n",
" \n",
"with open(os.path.join(output_path, 'word_dict_all.pkl'), 'wb') as f:\n",
" pickle.dump(word_dict, f)"
" pickle.dump(word_dict_all, f)"
]
},
{
Expand Down
35 changes: 23 additions & 12 deletions recommenders/datasets/mind.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,6 @@
)


URL_MIND_LARGE_TRAIN = (
"https://mind201910small.blob.core.windows.net/release/MINDlarge_train.zip"
)
URL_MIND_LARGE_VALID = (
"https://mind201910small.blob.core.windows.net/release/MINDlarge_dev.zip"
)
URL_MIND_SMALL_TRAIN = (
"https://mind201910small.blob.core.windows.net/release/MINDsmall_train.zip"
)
URL_MIND_SMALL_VALID = (
"https://mind201910small.blob.core.windows.net/release/MINDsmall_dev.zip"
)
URL_MIND_DEMO_TRAIN = (
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_train.zip"
)
Expand All @@ -39,6 +27,29 @@
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_utils.zip"
)

URL_MIND_SMALL_TRAIN = (
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_train.zip"
)
URL_MIND_SMALL_VALID = (
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_dev.zip"
)
URL_MIND_SMALL_UTILS = (
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_utils.zip"
)

URL_MIND_LARGE_TRAIN = (
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_train.zip"
)
URL_MIND_LARGE_VALID = (
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_dev.zip"
)
URL_MIND_LARGE_TEST = (
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_test.zip"
)
URL_MIND_LARGE_UTILS = (
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_utils.zip"
)

URL_MIND = {
"large": (URL_MIND_LARGE_TRAIN, URL_MIND_LARGE_VALID),
"small": (URL_MIND_SMALL_TRAIN, URL_MIND_SMALL_VALID),
Expand Down
2 changes: 2 additions & 0 deletions recommenders/models/deeprec/DataModel/ImplicitCF.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,8 @@ def train_loader(self, batch_size):
"""

def sample_neg(x):
if len(x) >= self.n_items:
raise ValueError("A user has voted in every item. Can't find a negative sample.")
while True:
neg_id = random.randint(0, self.n_items - 1)
if neg_id not in x:
Expand Down
4 changes: 2 additions & 2 deletions recommenders/models/newsrec/newsrec_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,15 +310,15 @@ def get_mind_data_set(type):

if type == "large":
return (
"https://mind201910small.blob.core.windows.net/release/",
"https://recodatasets.z20.web.core.windows.net/newsrec/",
"MINDlarge_train.zip",
"MINDlarge_dev.zip",
"MINDlarge_utils.zip",
)

elif type == "small":
return (
"https://mind201910small.blob.core.windows.net/release/",
"https://recodatasets.z20.web.core.windows.net/newsrec/",
"MINDsmall_train.zip",
"MINDsmall_dev.zip",
"MINDsmall_utils.zip",
Expand Down
Loading

0 comments on commit 450bf17

Please sign in to comment.