Skip to content

Commit

Permalink
Merge pull request #15 from Techtonique/install-ms
Browse files Browse the repository at this point in the history
remove dust
  • Loading branch information
thierrymoudiki authored Nov 2, 2023
2 parents 634077f + 8eeda5c commit 4fc2eb5
Show file tree
Hide file tree
Showing 8 changed files with 33 additions and 142 deletions.
7 changes: 5 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,12 @@ dist: clean ## builds source and wheel package
ls -l dist

install: clean ## install the package to the active Python's site-packages
python setup.py install
python3 -m pip install .

build-site: docs
cd docs&&mkdocs build
cp -rf docs/site/* ../../Pro_Website/Techtonique.github.io/mlsauce
cd ..
cd ..

run-examples: ## run all examples with one command
find examples -maxdepth 2 -name "*.py" -exec python3 {} \;
39 changes: 2 additions & 37 deletions examples/lasso_regressor.py
Original file line number Diff line number Diff line change
@@ -1,46 +1,11 @@
import mlsauce as ms
import numpy as np
from sklearn.datasets import load_boston, load_diabetes
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from time import time
from os import chdir
from sklearn import metrics


#wd="/workspace/mlsauce/mlsauce/examples"
#
#chdir(wd)

import mlsauce as ms


# data 1
boston = load_boston()
X = boston.data
y = boston.target
# split data into training test and test set
np.random.seed(15029)
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2)

obj = ms.LassoRegressor(backend="cpu")
start = time()
obj.fit(X_train, y_train)
print(time()-start)
start = time()
print(np.sqrt(np.mean(np.square(obj.predict(X_test) - y_test))))
print(time()-start)
print(obj.beta)

# obj = ms.LassoRegressor(backend="gpu")
# start = time()
# obj.fit(X_train, y_train)
# print(time()-start)
# start = time()
# print(np.sqrt(np.mean(np.square(obj.predict(X_test) - y_test))))
# print(time()-start)



# data 2
diabetes = load_diabetes()
X = diabetes.data
Expand Down
74 changes: 2 additions & 72 deletions examples/lsboost_regressor.py
Original file line number Diff line number Diff line change
@@ -1,56 +1,18 @@
import mlsauce as ms
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_boston, load_diabetes
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from time import time
from os import chdir
from sklearn import metrics


#wd="/workspace/mlsauce/mlsauce/examples"
#
#chdir(wd)

import mlsauce as ms

# ridge

print("\n")
print("ridge -----")
print("\n")

# data 1
boston = load_boston()
X = boston.data
y = boston.target
# split data into training test and test set
np.random.seed(15029)
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2)

obj = ms.LSBoostRegressor(tolerance=5e-2, activation="relu6", col_sample=0.9, row_sample=0.9)
print(obj.get_params())
start = time()
obj.fit(X_train, y_train)
print(time()-start)
print(obj)
start = time()
print(np.sqrt(np.mean(np.square(obj.predict(X_test) - y_test))))
print(time()-start)

print(obj.obj['loss'])

# MORE DATA NEEDED # MORE DATA NEEDED # MORE DATA NEEDED
obj = ms.LSBoostRegressor(backend="gpu")
print(obj.get_params())
start = time()
obj.fit(X_train, y_train)
print(time()-start)
print(obj)
start = time()
print(np.sqrt(np.mean(np.square(obj.predict(X_test) - y_test))))
print(time()-start)

# data 2
diabetes = load_diabetes()
X = diabetes.data
Expand Down Expand Up @@ -89,38 +51,6 @@
print("lasso -----")
print("\n")

# data 1
boston = load_boston()
X = boston.data
y = boston.target
# split data into training test and test set
np.random.seed(15029)
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2)

obj = ms.LSBoostRegressor(solver="lasso")
print(obj.get_params())
start = time()
obj.fit(X_train, y_train)
print(time()-start)
print(obj)
start = time()
print(np.sqrt(np.mean(np.square(obj.predict(X_test) - y_test))))
print(time()-start)

print(obj.obj['loss'])

# MORE DATA NEEDED # MORE DATA NEEDED # MORE DATA NEEDED
# obj = ms.LSBoostRegressor(backend="gpu", solver="lasso")
# print(obj.get_params())
# start = time()
# obj.fit(X_train, y_train)
# print(time()-start)
# print(obj)
# start = time()
# print(np.sqrt(np.mean(np.square(obj.predict(X_test) - y_test))))
# print(time()-start)

# data 2
diabetes = load_diabetes()
X = diabetes.data
Expand Down
4 changes: 2 additions & 2 deletions mlsauce/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
from .lasso import LassoRegressor
from .ridge import RidgeRegressor
from .stump import StumpClassifier
from .encoders import corrtarget_encoder
#from .encoders import corrtarget_encoder

__all__ = [
"AdaOpt",
Expand All @@ -75,7 +75,7 @@
"LSBoostRegressor",
"RidgeRegressor",
# Other imports
"corrtarget_encoder",
#"corrtarget_encoder",
# Non-modules:
"get_config",
"set_config",
Expand Down
10 changes: 5 additions & 5 deletions mlsauce/booster/_boosterc.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ cdef struct mydoubletuple:

DTYPE_double = np.double

DTYPE_int = np.int
DTYPE_int = np.int32

ctypedef np.double_t DTYPE_double_t

Expand Down Expand Up @@ -158,7 +158,7 @@ def fit_booster_classifier(double[:,::1] X, long int[:] y,
np.random.seed(seed + iter*1000)

iy = np.sort(np.random.choice(a=range(p),
size=np.int(p*col_sample),
size=np.int32(p*col_sample),
replace=False),
kind='quicksort')
res['col_index_i'][iter] = iy
Expand All @@ -171,7 +171,7 @@ def fit_booster_classifier(double[:,::1] X, long int[:] y,
if row_sample < 1:

ix = np.sort(np.random.choice(a=range(n),
size=np.int(n*row_sample),
size=np.int32(n*row_sample),
replace=False),
kind='quicksort')
X_iy_ix = X_iy[ix,:]
Expand Down Expand Up @@ -302,7 +302,7 @@ def fit_booster_regressor(double[:,::1] X, double[:] y,
np.random.seed(seed + iter*1000)

iy = np.sort(np.random.choice(a=range(p),
size=np.int(p*col_sample),
size=np.int32(p*col_sample),
replace=False),
kind='quicksort')
res['col_index_i'][iter] = iy
Expand All @@ -315,7 +315,7 @@ def fit_booster_regressor(double[:,::1] X, double[:] y,
if row_sample < 1:

ix = np.sort(np.random.choice(a=range(n),
size=np.int(n*row_sample),
size=np.int32(n*row_sample),
replace=False),
kind='quicksort')
X_iy_ix = X_iy[ix,:]
Expand Down
2 changes: 1 addition & 1 deletion mlsauce/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def configuration(parent_package="", top_path=None):
# submodules which do not have their own setup.py
# we must manually add sub-submodules & tests
# config.add_subpackage("demo")
config.add_subpackage("encoders")
#config.add_subpackage("encoders")
config.add_subpackage("utils")
config.add_subpackage("utils/memoryuse")
config.add_subpackage("utils/misc")
Expand Down
14 changes: 1 addition & 13 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,25 +1,13 @@
absl-py==0.11.0
cycler==0.10.0
numpy>=1.12
Cython==0.29.21
flatbuffers==1.12
jax==0.2.9
jaxlib==0.1.60
joblib==1.0.0
kiwisolver==1.3.1
matplotlib==3.3.4
numpy>=1.12
opt-einsum==3.3.0
pandas==1.2.1
Pillow==9.0.1
pymongo==3.11.3
pyparsing==2.4.7
python-dateutil==2.8.1
pytz==2021.1
querier==0.4.0
scikit-learn==0.24.1
scipy==1.6.0
six==1.15.0
sklearn==0.0
SQLAlchemy==1.3.23
threadpoolctl==2.1.0
tqdm==4.56.0
25 changes: 15 additions & 10 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@
# Copyright (C) 2020 T. Moudiki <[email protected]>
# License: 3-clause BSD

import subprocess
import sys
import os
import platform
import shutil
from distutils.command.clean import clean as Clean
from pkg_resources import parse_version
from setuptools import find_packages
import traceback
import importlib
try:
Expand All @@ -17,12 +19,10 @@
# Python 2 compat: just to be able to declare that Python >=3.5 is needed.
import __builtin__ as builtins

# This is a bit (!) hackish: we are setting a global variable so that the
# main mlsauce __init__ can detect if it is being loaded by the setup
# routine, to avoid attempting to load components that aren't built yet:
# the numpy distutils extensions that are used by mlsauce to
# recursively build the compiled extensions in sub-packages is based on the
# Python import machinery.
subprocess.run(['pip', 'install', 'numpy>= 1.13.0'])
subprocess.run(['pip', 'install', 'scipy>= 0.19.0'])
subprocess.run(['pip', 'install', 'Cython==0.29.21'])

builtins.__MLSAUCE_SETUP__ = True


Expand All @@ -37,7 +37,7 @@
# does not need the compiled code
import mlsauce

__version__ = '0.8.8'
__version__ = '0.8.9'

VERSION = __version__

Expand Down Expand Up @@ -230,17 +230,20 @@ def setup_package():
'scipy>={}'.format(SCIPY_MIN_VERSION),
'joblib>={}'.format(JOBLIB_MIN_VERSION),
'scikit-learn>={}'.format(SKLEARN_MIN_VERSION),
'threadpoolctl>={}'.format(THREADPOOLCTL_MIN_VERSION),
#'threadpoolctl>={}'.format(THREADPOOLCTL_MIN_VERSION),
'pandas>={}'.format(PANDAS_MIN_VERSION),
'querier>={}'.format(QUERIER_MIN_VERSION)
#'querier>={}'.format(QUERIER_MIN_VERSION)
]

install_jax_requires = [
'jax>={}'.format(JAX_MIN_VERSION),
'jaxlib>={}'.format(JAXLIB_MIN_VERSION)
] if platform.system() in ('Linux', 'Darwin') else []

other_requirements = ["tqdm==4.48.1", "pymongo >= 3.10.1", "SQLAlchemy >= 1.3.18"]
other_requirements = ["tqdm==4.48.1",
#"pymongo >= 3.10.1",
#"SQLAlchemy >= 1.3.18"
]

install_requires = [item for sublist in [install_jax_requires, install_all_requires, other_requirements] for item in sublist]

Expand All @@ -264,7 +267,9 @@ def setup_package():
cmdclass=cmdclass,
python_requires=">=3.5",
install_requires=install_requires,
setup_requires=["numpy>= 1.13.0"],
package_data={'': ['*.pxd']},
packages=find_packages(),
**extra_setuptools_args)

if len(sys.argv) == 1 or (
Expand Down

0 comments on commit 4fc2eb5

Please sign in to comment.