diff --git a/src/DotNetBridge/MessageValidator.cs b/src/DotNetBridge/MessageValidator.cs index 4243a45d..1ebcae67 100644 --- a/src/DotNetBridge/MessageValidator.cs +++ b/src/DotNetBridge/MessageValidator.cs @@ -195,7 +195,7 @@ public sealed class MessageValidator { return "Failed to initialize CUDA runtime. Possible reasons:" + "\n" + @"1. The machine does not have CUDA-capable card. Supported devices have compute capability 2.0 and higher." + "\n" + - @"2. Outdated graphics drivers. Please install the latest drivers from http://www.nvidia.com/Drivers ." + "\n" + + @"2. Outdated graphics drivers. Please install the latest drivers from https://www.nvidia.com/Download/index.aspx?lang=en-us ." + "\n" + @"3. CUDA runtime DLLs are missing, please see the GPU acceleration help for the installation instructions."; } ) diff --git a/src/NativeBridge/DataViewInterop.h b/src/NativeBridge/DataViewInterop.h index c764b285..9d9f19f1 100644 --- a/src/NativeBridge/DataViewInterop.h +++ b/src/NativeBridge/DataViewInterop.h @@ -16,7 +16,7 @@ typedef MANAGED_CALLBACK_PTR(bool, GETLABELS)(DataSourceBlock *source, int col, // REVIEW: boost_python is not updated at the same speed as swig or pybind11. // Both have a larger audience now, see about pybind11 https://github.com/davisking/dlib/issues/293 -// It handles csr_matrix: http://pybind11-rtdtest.readthedocs.io/en/stable/advanced.html#transparent-conversion-of-dense-and-sparse-eigen-data-types. +// It handles csr_matrix: https://pybind11-rtdtest.readthedocs.io/en/stable/advanced.html#transparent-conversion-of-dense-and-sparse-eigen-data-types. using namespace boost::python; // The data source wrapper used for managed interop. Some of the fields of this are visible to managed code. diff --git a/src/python/docs/docstrings/AveragedPerceptronBinaryClassifier.txt b/src/python/docs/docstrings/AveragedPerceptronBinaryClassifier.txt index 45b12209..31314605 100644 --- a/src/python/docs/docstrings/AveragedPerceptronBinaryClassifier.txt +++ b/src/python/docs/docstrings/AveragedPerceptronBinaryClassifier.txt @@ -45,10 +45,10 @@ `_ `Large Margin Classification Using the Perceptron Algorithm - `_ + `_ `Discriminative Training Methods for Hidden Markov Models - `_ + `_ :param loss: The default is :py:class:`'hinge' `. Other diff --git a/src/python/docs/docstrings/FactorizationMachineBinaryClassifier.txt b/src/python/docs/docstrings/FactorizationMachineBinaryClassifier.txt index 787972a2..c8e86ac9 100644 --- a/src/python/docs/docstrings/FactorizationMachineBinaryClassifier.txt +++ b/src/python/docs/docstrings/FactorizationMachineBinaryClassifier.txt @@ -22,7 +22,7 @@ `Field Aware Factorization Machines `_, `Field-aware Factorization Machines for CTR Prediction - `_, + `_, `Adaptive Subgradient Methods for Online Learning and Stochastic Optimization `_ diff --git a/src/python/docs/docstrings/FastForestBinaryClassifier.txt b/src/python/docs/docstrings/FastForestBinaryClassifier.txt index 6ebc1938..3e9a6688 100644 --- a/src/python/docs/docstrings/FastForestBinaryClassifier.txt +++ b/src/python/docs/docstrings/FastForestBinaryClassifier.txt @@ -33,7 +33,7 @@ **Reference** `Wikipedia: Random forest - `_ + `_ `Quantile regression forest `_ diff --git a/src/python/docs/docstrings/FastForestRegressor.txt b/src/python/docs/docstrings/FastForestRegressor.txt index 0d01ad8c..35a6ad5e 100644 --- a/src/python/docs/docstrings/FastForestRegressor.txt +++ b/src/python/docs/docstrings/FastForestRegressor.txt @@ -43,7 +43,7 @@ **Reference** `Wikipedia: Random forest - `_ + `_ `Quantile regression forest `_ diff --git a/src/python/docs/docstrings/FastLinearBinaryClassifier.txt b/src/python/docs/docstrings/FastLinearBinaryClassifier.txt index db2c74db..c956db97 100644 --- a/src/python/docs/docstrings/FastLinearBinaryClassifier.txt +++ b/src/python/docs/docstrings/FastLinearBinaryClassifier.txt @@ -58,8 +58,7 @@ content/uploads/2016/06/main-3.pdf>`_ `Stochastic Dual Coordinate Ascent Methods for Regularized Loss - Minimization `_ + Minimization `_ :param loss: The default is :py:class:`'log' `. Other diff --git a/src/python/docs/docstrings/FastLinearClassifier.txt b/src/python/docs/docstrings/FastLinearClassifier.txt index 2fcb2868..32a3d35d 100644 --- a/src/python/docs/docstrings/FastLinearClassifier.txt +++ b/src/python/docs/docstrings/FastLinearClassifier.txt @@ -56,8 +56,7 @@ content/uploads/2016/06/main-3.pdf>`_ `Stochastic Dual Coordinate Ascent Methods for Regularized Loss - Minimization `_ + Minimization `_ :param feature: see `Columns `_. diff --git a/src/python/docs/docstrings/FastLinearRegressor.txt b/src/python/docs/docstrings/FastLinearRegressor.txt index 4dda71be..080008fa 100644 --- a/src/python/docs/docstrings/FastLinearRegressor.txt +++ b/src/python/docs/docstrings/FastLinearRegressor.txt @@ -56,8 +56,7 @@ content/uploads/2016/06/main-3.pdf>`_ `Stochastic Dual Coordinate Ascent Methods for Regularized Loss - Minimization `_ + Minimization `_ :param loss: The only supported loss is :py:class:`'squared' diff --git a/src/python/docs/docstrings/FastTreesBinaryClassifier.txt b/src/python/docs/docstrings/FastTreesBinaryClassifier.txt index 1789d738..15865149 100644 --- a/src/python/docs/docstrings/FastTreesBinaryClassifier.txt +++ b/src/python/docs/docstrings/FastTreesBinaryClassifier.txt @@ -57,7 +57,7 @@ `_ `Greedy function approximation: A gradient boosting machine. - `_ + `_ :param optimizer: Default is ``sgd``. diff --git a/src/python/docs/docstrings/FastTreesRegressor.txt b/src/python/docs/docstrings/FastTreesRegressor.txt index cd1f76b8..91a3622d 100644 --- a/src/python/docs/docstrings/FastTreesRegressor.txt +++ b/src/python/docs/docstrings/FastTreesRegressor.txt @@ -62,7 +62,7 @@ `_ `Greedy function approximation: A gradient boosting machine. - `_ + `_ :param optimizer: Default is ``sgd``. diff --git a/src/python/docs/docstrings/FastTreesTweedieRegressor.txt b/src/python/docs/docstrings/FastTreesTweedieRegressor.txt index 76cd6749..3c02e645 100644 --- a/src/python/docs/docstrings/FastTreesTweedieRegressor.txt +++ b/src/python/docs/docstrings/FastTreesTweedieRegressor.txt @@ -14,7 +14,7 @@ `_ `Greedy function approximation: A gradient boosting machine. - `_ + `_ :param optimizer: Default is ``sgd``. diff --git a/src/python/docs/docstrings/GamBinaryClassifier.txt b/src/python/docs/docstrings/GamBinaryClassifier.txt index 69484156..acd5f023 100644 --- a/src/python/docs/docstrings/GamBinaryClassifier.txt +++ b/src/python/docs/docstrings/GamBinaryClassifier.txt @@ -21,7 +21,7 @@ functions learned will step between the discretization boundaries. This implementation is based on the this `paper - `_, + `_, but diverges from it in several important respects: most significantly, in each round of boosting, rather than do one feature at a time, it @@ -57,7 +57,7 @@ `Generalized additive models `_, `Intelligible Models for Classification and Regression - `_ + `_ :param normalize: Specifies the type of automatic normalization used: diff --git a/src/python/docs/docstrings/GamRegressor.txt b/src/python/docs/docstrings/GamRegressor.txt index 54d71d10..3e44a736 100644 --- a/src/python/docs/docstrings/GamRegressor.txt +++ b/src/python/docs/docstrings/GamRegressor.txt @@ -21,7 +21,7 @@ functions learned will step between the discretization boundaries. This implementation is based on the this `paper - `_, + `_, but diverges from it in several important respects: most significantly, in each round of boosting, rather than do one feature at a time, it @@ -57,7 +57,7 @@ `Generalized additive models `_, `Intelligible Models for Classification and Regression - `_ + `_ :param normalize: Specifies the type of automatic normalization used: diff --git a/src/python/docs/docstrings/LightLda.txt b/src/python/docs/docstrings/LightLda.txt index 95736da9..aaec0162 100644 --- a/src/python/docs/docstrings/LightLda.txt +++ b/src/python/docs/docstrings/LightLda.txt @@ -10,7 +10,7 @@ topical vectors. LightLDA is an extremely efficient implementation of LDA developed in MSR-Asia that incorporates a number of optimization techniques - `(http://arxiv.org/abs/1412.1576) `_. + `(https://arxiv.org/abs/1412.1576) `_. With the LDA transform, we can train a topic model to produce 1 million topics with 1 million vocabulary on a 1-billion-token document set one diff --git a/src/python/docs/docstrings/LocalDeepSvmBinaryClassifier.txt b/src/python/docs/docstrings/LocalDeepSvmBinaryClassifier.txt index cf028dcd..4863237a 100644 --- a/src/python/docs/docstrings/LocalDeepSvmBinaryClassifier.txt +++ b/src/python/docs/docstrings/LocalDeepSvmBinaryClassifier.txt @@ -39,14 +39,14 @@ More details about LD-SVM can be found in this paper `Local deep kernel learning for efficient non-linear SVM prediction - `_. **Reference** `Local deep kernel learning for efficient non-linear SVM prediction - `_ diff --git a/src/python/docs/docstrings/LogisticRegressionBinaryClassifier.txt b/src/python/docs/docstrings/LogisticRegressionBinaryClassifier.txt index 6fb1063d..b268dea2 100644 --- a/src/python/docs/docstrings/LogisticRegressionBinaryClassifier.txt +++ b/src/python/docs/docstrings/LogisticRegressionBinaryClassifier.txt @@ -69,14 +69,14 @@ **Reference** - `Wikipedia: L-BFGS `_ + `Wikipedia: L-BFGS `_ `Wikipedia: Logistic - regression `_ + regression `_ `Scalable Training of L1-Regularized Log-Linear Models - `_ + `_ `Test Run - L1 and L2 Regularization for Machine Learning diff --git a/src/python/docs/docstrings/LogisticRegressionClassifier.txt b/src/python/docs/docstrings/LogisticRegressionClassifier.txt index db6f386e..405c20f3 100644 --- a/src/python/docs/docstrings/LogisticRegressionClassifier.txt +++ b/src/python/docs/docstrings/LogisticRegressionClassifier.txt @@ -70,14 +70,14 @@ **Reference** - `Wikipedia: L-BFGS `_ + `Wikipedia: L-BFGS `_ `Wikipedia: Logistic - regression `_ + regression `_ `Scalable Training of L1-Regularized Log-Linear Models - `_ + `_ `Test Run - L1 and L2 Regularization for Machine Learning diff --git a/src/python/docs/docstrings/OneClassSVMAnomalyDetector.txt b/src/python/docs/docstrings/OneClassSVMAnomalyDetector.txt index 958bd389..44e9ef30 100644 --- a/src/python/docs/docstrings/OneClassSVMAnomalyDetector.txt +++ b/src/python/docs/docstrings/OneClassSVMAnomalyDetector.txt @@ -29,10 +29,10 @@ us/library/azure/dn913103.aspx>`_ `Estimating the Support of a High-Dimensional Distribution - `_ + `_ `New Support Vector Algorithms - `_ + `_ `LIBSVM: A Library for Support Vector Machines `_ diff --git a/src/python/docs/docstrings/PcaAnomalyDetector.txt b/src/python/docs/docstrings/PcaAnomalyDetector.txt index 5896c5c9..f51aaf24 100644 --- a/src/python/docs/docstrings/PcaAnomalyDetector.txt +++ b/src/python/docs/docstrings/PcaAnomalyDetector.txt @@ -36,13 +36,12 @@ `Randomized Methods for Computing the Singular Value Decomposition (SVD) of very large matrices - `_ + `_ `A randomized algorithm for principal component analysis `_, `Finding Structure with Randomness: Probabilistic Algorithms for Constructing Approximate Matrix Decompositions - `_ + `_ :param feature: see `Columns `_. diff --git a/src/python/docs/docstrings/SgdBinaryClassifier.txt b/src/python/docs/docstrings/SgdBinaryClassifier.txt index c1ed86ac..a585e088 100644 --- a/src/python/docs/docstrings/SgdBinaryClassifier.txt +++ b/src/python/docs/docstrings/SgdBinaryClassifier.txt @@ -13,14 +13,14 @@ associated optimization problem is sparse, then Hogwild SGD achieves a nearly optimal rate of convergence. For a detailed reference, please - refer to `http://arxiv.org/pdf/1106.5730v2.pdf - `_. + refer to `https://arxiv.org/pdf/1106.5730v2.pdf + `_. **Reference** - `http://arxiv.org/pdf/1106.5730v2.pdf - `_ + `https://arxiv.org/pdf/1106.5730v2.pdf + `_ :param normalize: Specifies the type of automatic normalization used: diff --git a/src/python/docs/docstrings/SigmoidKernel.txt b/src/python/docs/docstrings/SigmoidKernel.txt index 3a22d2cd..62c5785a 100644 --- a/src/python/docs/docstrings/SigmoidKernel.txt +++ b/src/python/docs/docstrings/SigmoidKernel.txt @@ -3,8 +3,7 @@ Apply sigmoid function. tanh(gamma*+c). .. remarks:: - `SigmoidKernel `_ is a + `SigmoidKernel `_ is a kernel function that computes the similarity between two features. diff --git a/src/python/docs/docstrings/SsaForecaster.txt b/src/python/docs/docstrings/SsaForecaster.txt index 8873702b..a8a99a6f 100644 --- a/src/python/docs/docstrings/SsaForecaster.txt +++ b/src/python/docs/docstrings/SsaForecaster.txt @@ -11,7 +11,7 @@ input time-series where each component in the spectrum corresponds to a trend, seasonal or noise component in the time-series. For details of the Singular Spectrum Analysis (SSA), refer to `this document - `_. + `_. .. seealso:: :py:func:`IIDChangePointDetector diff --git a/src/python/docs/docstrings/SsweEmbedding.txt b/src/python/docs/docstrings/SsweEmbedding.txt index 4c476285..12a4c509 100644 --- a/src/python/docs/docstrings/SsweEmbedding.txt +++ b/src/python/docs/docstrings/SsweEmbedding.txt @@ -7,12 +7,12 @@ versions of `GloVe Models `_, `FastText `_, and `Sswe - `_. + `_. .. remarks:: Sentiment-specific word embedding (SSWE) is a DNN featurizer developed - by MSRA (`paper `_). + by MSRA (`paper `_). It incorporates sentiment information into the neural network to learn sentiment specific word embedding. It proves to be useful in various diff --git a/src/python/docs/docstrings/SupervisedBinner.txt b/src/python/docs/docstrings/SupervisedBinner.txt index 963a560e..95317b75 100644 --- a/src/python/docs/docstrings/SupervisedBinner.txt +++ b/src/python/docs/docstrings/SupervisedBinner.txt @@ -24,7 +24,7 @@ the default is to normalize features before training. ``SupervisedBinner`` implements the `Entropy-Based Discretization - `_. + `_. Partition of the data is performed recursively to select the split with highest entropy gain with respect to the label. Therefore, the final binned features will have high correlation with diff --git a/src/python/docs/docstrings/WordEmbedding.txt b/src/python/docs/docstrings/WordEmbedding.txt index 41d6f1c6..f19c73d8 100644 --- a/src/python/docs/docstrings/WordEmbedding.txt +++ b/src/python/docs/docstrings/WordEmbedding.txt @@ -10,7 +10,7 @@ available options are various versions of `GloVe Models `_, `FastText `_, and `Sswe - `_. + `_. :param model_kind: Pre-trained model used to create the vocabulary. diff --git a/src/python/docs/sphinx/ci_script/_static/mystyle.css b/src/python/docs/sphinx/ci_script/_static/mystyle.css index a69e381c..a5df3a24 100644 --- a/src/python/docs/sphinx/ci_script/_static/mystyle.css +++ b/src/python/docs/sphinx/ci_script/_static/mystyle.css @@ -8432,7 +8432,7 @@ label { padding: 0px; } /* Flexible box model classes */ -/* Taken from Alex Russell http://infrequently.org/2009/08/css-3-progress/ */ +/* Taken from Alex Russell https://infrequently.org/2009/08/css-3-progress/ */ /* This file is a compatability layer. It allows the usage of flexible box model layouts accross multiple browsers, including older browsers. The newest, universal implementation of the flexible box model is used when available (see diff --git a/src/python/docs/sphinx/ci_script/conf.py b/src/python/docs/sphinx/ci_script/conf.py index f96889d1..1acb3312 100644 --- a/src/python/docs/sphinx/ci_script/conf.py +++ b/src/python/docs/sphinx/ci_script/conf.py @@ -128,8 +128,8 @@ 'relative': True, 'reference_url': { 'nimbusml': None, - 'matplotlib': 'http://matplotlib.org', - 'numpy': 'http://www.numpy.org/', + 'matplotlib': 'https://matplotlib.org', + 'numpy': 'https://www.numpy.org/', 'scipy': 'https://www.scipy.org/'}, } diff --git a/src/python/docs/sphinx/conf.py b/src/python/docs/sphinx/conf.py index 1f0cccfc..9fb1d4ab 100644 --- a/src/python/docs/sphinx/conf.py +++ b/src/python/docs/sphinx/conf.py @@ -145,8 +145,8 @@ def install_and_import(package): 'relative': True, 'reference_url': { 'nimbusml': None, - 'matplotlib': 'http://matplotlib.org', - 'numpy': 'http://www.numpy.org/', + 'matplotlib': 'https://matplotlib.org', + 'numpy': 'https://www.numpy.org/', 'scipy': 'https://www.scipy.org/'}, } diff --git a/src/python/nimbusml.pyproj b/src/python/nimbusml.pyproj index af4cca04..c22c2a99 100644 --- a/src/python/nimbusml.pyproj +++ b/src/python/nimbusml.pyproj @@ -792,11 +792,13 @@ + + diff --git a/src/python/nimbusml/decomposition/factorizationmachinebinaryclassifier.py b/src/python/nimbusml/decomposition/factorizationmachinebinaryclassifier.py index fd3d75a2..4a8f6c44 100644 --- a/src/python/nimbusml/decomposition/factorizationmachinebinaryclassifier.py +++ b/src/python/nimbusml/decomposition/factorizationmachinebinaryclassifier.py @@ -44,7 +44,7 @@ class FactorizationMachineBinaryClassifier( `Field Aware Factorization Machines `_, `Field-aware Factorization Machines for CTR Prediction - `_, + `_, `Adaptive Subgradient Methods for Online Learning and Stochastic Optimization `_ diff --git a/src/python/nimbusml/decomposition/pcaanomalydetector.py b/src/python/nimbusml/decomposition/pcaanomalydetector.py index bdf42b22..85938224 100644 --- a/src/python/nimbusml/decomposition/pcaanomalydetector.py +++ b/src/python/nimbusml/decomposition/pcaanomalydetector.py @@ -57,13 +57,12 @@ class PcaAnomalyDetector(core, BasePredictor, ClassifierMixin): `Randomized Methods for Computing the Singular Value Decomposition (SVD) of very large matrices - `_ + `_ `A randomized algorithm for principal component analysis `_, `Finding Structure with Randomness: Probabilistic Algorithms for Constructing Approximate Matrix Decompositions - `_ + `_ :param feature: see `Columns `_. diff --git a/src/python/nimbusml/ensemble/fastforestbinaryclassifier.py b/src/python/nimbusml/ensemble/fastforestbinaryclassifier.py index ea911977..5e6d5bd9 100644 --- a/src/python/nimbusml/ensemble/fastforestbinaryclassifier.py +++ b/src/python/nimbusml/ensemble/fastforestbinaryclassifier.py @@ -55,7 +55,7 @@ class FastForestBinaryClassifier( **Reference** `Wikipedia: Random forest - `_ + `_ `Quantile regression forest `_ diff --git a/src/python/nimbusml/ensemble/fastforestregressor.py b/src/python/nimbusml/ensemble/fastforestregressor.py index 5a2affe4..cb20c847 100644 --- a/src/python/nimbusml/ensemble/fastforestregressor.py +++ b/src/python/nimbusml/ensemble/fastforestregressor.py @@ -64,7 +64,7 @@ class FastForestRegressor(core, BasePredictor, RegressorMixin): **Reference** `Wikipedia: Random forest - `_ + `_ `Quantile regression forest `_ diff --git a/src/python/nimbusml/ensemble/fasttreesbinaryclassifier.py b/src/python/nimbusml/ensemble/fasttreesbinaryclassifier.py index 8c12cb48..24f633fe 100644 --- a/src/python/nimbusml/ensemble/fasttreesbinaryclassifier.py +++ b/src/python/nimbusml/ensemble/fasttreesbinaryclassifier.py @@ -81,7 +81,7 @@ class FastTreesBinaryClassifier( `_ `Greedy function approximation: A gradient boosting machine. - `_ + `_ :param feature: see `Columns `_. diff --git a/src/python/nimbusml/ensemble/fasttreesregressor.py b/src/python/nimbusml/ensemble/fasttreesregressor.py index c3994230..12c8c59b 100644 --- a/src/python/nimbusml/ensemble/fasttreesregressor.py +++ b/src/python/nimbusml/ensemble/fasttreesregressor.py @@ -83,7 +83,7 @@ class FastTreesRegressor(core, BasePredictor, RegressorMixin): `_ `Greedy function approximation: A gradient boosting machine. - `_ + `_ :param feature: see `Columns `_. diff --git a/src/python/nimbusml/ensemble/fasttreestweedieregressor.py b/src/python/nimbusml/ensemble/fasttreestweedieregressor.py index 1db266b7..177d9ede 100644 --- a/src/python/nimbusml/ensemble/fasttreestweedieregressor.py +++ b/src/python/nimbusml/ensemble/fasttreestweedieregressor.py @@ -38,7 +38,7 @@ class FastTreesTweedieRegressor( `_ `Greedy function approximation: A gradient boosting machine. - `_ + `_ :param feature: see `Columns `_. diff --git a/src/python/nimbusml/ensemble/gambinaryclassifier.py b/src/python/nimbusml/ensemble/gambinaryclassifier.py index eb08e95c..79808610 100644 --- a/src/python/nimbusml/ensemble/gambinaryclassifier.py +++ b/src/python/nimbusml/ensemble/gambinaryclassifier.py @@ -42,7 +42,7 @@ class GamBinaryClassifier(core, BasePredictor, ClassifierMixin): functions learned will step between the discretization boundaries. This implementation is based on the this `paper - `_, + `_, but diverges from it in several important respects: most significantly, in each round of boosting, rather than do one feature at a time, it @@ -78,7 +78,7 @@ class GamBinaryClassifier(core, BasePredictor, ClassifierMixin): `Generalized additive models `_, `Intelligible Models for Classification and Regression - `_ + `_ :param feature: see `Columns `_. diff --git a/src/python/nimbusml/ensemble/gamregressor.py b/src/python/nimbusml/ensemble/gamregressor.py index c57ad499..45796805 100644 --- a/src/python/nimbusml/ensemble/gamregressor.py +++ b/src/python/nimbusml/ensemble/gamregressor.py @@ -41,7 +41,7 @@ class GamRegressor(core, BasePredictor, RegressorMixin): functions learned will step between the discretization boundaries. This implementation is based on the this `paper - `_, + `_, but diverges from it in several important respects: most significantly, in each round of boosting, rather than do one feature at a time, it @@ -77,7 +77,7 @@ class GamRegressor(core, BasePredictor, RegressorMixin): `Generalized additive models `_, `Intelligible Models for Classification and Regression - `_ + `_ :param feature: see `Columns `_. diff --git a/src/python/nimbusml/feature_extraction/text/lightlda.py b/src/python/nimbusml/feature_extraction/text/lightlda.py index 271f90c7..f8801caa 100644 --- a/src/python/nimbusml/feature_extraction/text/lightlda.py +++ b/src/python/nimbusml/feature_extraction/text/lightlda.py @@ -30,7 +30,7 @@ class LightLda(core, BaseTransform, TransformerMixin): topical vectors. LightLDA is an extremely efficient implementation of LDA developed in MSR-Asia that incorporates a number of optimization techniques - `(http://arxiv.org/abs/1412.1576) `_. + `(https://arxiv.org/abs/1412.1576) `_. With the LDA transform, we can train a topic model to produce 1 million topics with 1 million vocabulary on a 1-billion-token document set one diff --git a/src/python/nimbusml/feature_extraction/text/wordembedding.py b/src/python/nimbusml/feature_extraction/text/wordembedding.py index ad467ce1..957cf06d 100644 --- a/src/python/nimbusml/feature_extraction/text/wordembedding.py +++ b/src/python/nimbusml/feature_extraction/text/wordembedding.py @@ -31,7 +31,7 @@ class WordEmbedding(core, BaseTransform, TransformerMixin): available options are various versions of `GloVe Models `_, `FastText `_, and `Sswe - `_. + `_. :param columns: a dictionary of key-value pairs, where key is the output diff --git a/src/python/nimbusml/internal/core/decomposition/factorizationmachinebinaryclassifier.py b/src/python/nimbusml/internal/core/decomposition/factorizationmachinebinaryclassifier.py index c54f353b..bdc0a7d2 100644 --- a/src/python/nimbusml/internal/core/decomposition/factorizationmachinebinaryclassifier.py +++ b/src/python/nimbusml/internal/core/decomposition/factorizationmachinebinaryclassifier.py @@ -42,7 +42,7 @@ class FactorizationMachineBinaryClassifier( `Field Aware Factorization Machines `_, `Field-aware Factorization Machines for CTR Prediction - `_, + `_, `Adaptive Subgradient Methods for Online Learning and Stochastic Optimization `_ diff --git a/src/python/nimbusml/internal/core/decomposition/pcaanomalydetector.py b/src/python/nimbusml/internal/core/decomposition/pcaanomalydetector.py index 728a7132..9fe01d4f 100644 --- a/src/python/nimbusml/internal/core/decomposition/pcaanomalydetector.py +++ b/src/python/nimbusml/internal/core/decomposition/pcaanomalydetector.py @@ -57,13 +57,12 @@ class PcaAnomalyDetector( `Randomized Methods for Computing the Singular Value Decomposition (SVD) of very large matrices - `_ + `_ `A randomized algorithm for principal component analysis `_, `Finding Structure with Randomness: Probabilistic Algorithms for Constructing Approximate Matrix Decompositions - `_ + `_ :param normalize: Specifies the type of automatic normalization used: diff --git a/src/python/nimbusml/internal/core/ensemble/fastforestbinaryclassifier.py b/src/python/nimbusml/internal/core/ensemble/fastforestbinaryclassifier.py index 270584a3..715c2035 100644 --- a/src/python/nimbusml/internal/core/ensemble/fastforestbinaryclassifier.py +++ b/src/python/nimbusml/internal/core/ensemble/fastforestbinaryclassifier.py @@ -54,7 +54,7 @@ class FastForestBinaryClassifier( **Reference** `Wikipedia: Random forest - `_ + `_ `Quantile regression forest `_ diff --git a/src/python/nimbusml/internal/core/ensemble/fastforestregressor.py b/src/python/nimbusml/internal/core/ensemble/fastforestregressor.py index 74698a6d..37278659 100644 --- a/src/python/nimbusml/internal/core/ensemble/fastforestregressor.py +++ b/src/python/nimbusml/internal/core/ensemble/fastforestregressor.py @@ -64,7 +64,7 @@ class FastForestRegressor( **Reference** `Wikipedia: Random forest - `_ + `_ `Quantile regression forest `_ diff --git a/src/python/nimbusml/internal/core/ensemble/fasttreesbinaryclassifier.py b/src/python/nimbusml/internal/core/ensemble/fasttreesbinaryclassifier.py index 37e5cd76..eef52d67 100644 --- a/src/python/nimbusml/internal/core/ensemble/fasttreesbinaryclassifier.py +++ b/src/python/nimbusml/internal/core/ensemble/fasttreesbinaryclassifier.py @@ -78,7 +78,7 @@ class FastTreesBinaryClassifier( `_ `Greedy function approximation: A gradient boosting machine. - `_ + `_ :param number_of_trees: Specifies the total number of decision trees to create in the ensemble. By creating more decision trees, you can diff --git a/src/python/nimbusml/internal/core/ensemble/fasttreesregressor.py b/src/python/nimbusml/internal/core/ensemble/fasttreesregressor.py index 3ee724c4..25becac7 100644 --- a/src/python/nimbusml/internal/core/ensemble/fasttreesregressor.py +++ b/src/python/nimbusml/internal/core/ensemble/fasttreesregressor.py @@ -83,7 +83,7 @@ class FastTreesRegressor( `_ `Greedy function approximation: A gradient boosting machine. - `_ + `_ :param number_of_trees: Specifies the total number of decision trees to create in the ensemble. By creating more decision trees, you can diff --git a/src/python/nimbusml/internal/core/ensemble/fasttreestweedieregressor.py b/src/python/nimbusml/internal/core/ensemble/fasttreestweedieregressor.py index f9340f5d..75a15169 100644 --- a/src/python/nimbusml/internal/core/ensemble/fasttreestweedieregressor.py +++ b/src/python/nimbusml/internal/core/ensemble/fasttreestweedieregressor.py @@ -35,7 +35,7 @@ class FastTreesTweedieRegressor( `_ `Greedy function approximation: A gradient boosting machine. - `_ + `_ :param number_of_trees: Specifies the total number of decision trees to create in the ensemble. By creating more decision trees, you can diff --git a/src/python/nimbusml/internal/core/ensemble/gambinaryclassifier.py b/src/python/nimbusml/internal/core/ensemble/gambinaryclassifier.py index 56d90d7e..52f2f565 100644 --- a/src/python/nimbusml/internal/core/ensemble/gambinaryclassifier.py +++ b/src/python/nimbusml/internal/core/ensemble/gambinaryclassifier.py @@ -42,7 +42,7 @@ class GamBinaryClassifier( functions learned will step between the discretization boundaries. This implementation is based on the this `paper - `_, + `_, but diverges from it in several important respects: most significantly, in each round of boosting, rather than do one feature at a time, it @@ -78,7 +78,7 @@ class GamBinaryClassifier( `Generalized additive models `_, `Intelligible Models for Classification and Regression - `_ + `_ :param number_of_iterations: Total number of iterations over all features. diff --git a/src/python/nimbusml/internal/core/ensemble/gamregressor.py b/src/python/nimbusml/internal/core/ensemble/gamregressor.py index 048bf874..de884d9a 100644 --- a/src/python/nimbusml/internal/core/ensemble/gamregressor.py +++ b/src/python/nimbusml/internal/core/ensemble/gamregressor.py @@ -40,7 +40,7 @@ class GamRegressor(BasePipelineItem, DefaultSignatureWithRoles): functions learned will step between the discretization boundaries. This implementation is based on the this `paper - `_, + `_, but diverges from it in several important respects: most significantly, in each round of boosting, rather than do one feature at a time, it @@ -76,7 +76,7 @@ class GamRegressor(BasePipelineItem, DefaultSignatureWithRoles): `Generalized additive models `_, `Intelligible Models for Classification and Regression - `_ + `_ :param number_of_iterations: Total number of iterations over all features. diff --git a/src/python/nimbusml/internal/core/feature_extraction/text/lightlda.py b/src/python/nimbusml/internal/core/feature_extraction/text/lightlda.py index 45743c1b..8fbcc6e5 100644 --- a/src/python/nimbusml/internal/core/feature_extraction/text/lightlda.py +++ b/src/python/nimbusml/internal/core/feature_extraction/text/lightlda.py @@ -28,7 +28,7 @@ class LightLda(BasePipelineItem, DefaultSignature): topical vectors. LightLDA is an extremely efficient implementation of LDA developed in MSR-Asia that incorporates a number of optimization techniques - `(http://arxiv.org/abs/1412.1576) `_. + `(https://arxiv.org/abs/1412.1576) `_. With the LDA transform, we can train a topic model to produce 1 million topics with 1 million vocabulary on a 1-billion-token document set one diff --git a/src/python/nimbusml/internal/core/feature_extraction/text/wordembedding.py b/src/python/nimbusml/internal/core/feature_extraction/text/wordembedding.py index d67df9db..45553249 100644 --- a/src/python/nimbusml/internal/core/feature_extraction/text/wordembedding.py +++ b/src/python/nimbusml/internal/core/feature_extraction/text/wordembedding.py @@ -28,7 +28,7 @@ class WordEmbedding(BasePipelineItem, DefaultSignature): available options are various versions of `GloVe Models `_, `FastText `_, and `Sswe - `_. + `_. :param model_kind: Pre-trained model used to create the vocabulary. diff --git a/src/python/nimbusml/internal/core/linear_model/averagedperceptronbinaryclassifier.py b/src/python/nimbusml/internal/core/linear_model/averagedperceptronbinaryclassifier.py index 26471467..67f10cfc 100644 --- a/src/python/nimbusml/internal/core/linear_model/averagedperceptronbinaryclassifier.py +++ b/src/python/nimbusml/internal/core/linear_model/averagedperceptronbinaryclassifier.py @@ -67,10 +67,10 @@ class AveragedPerceptronBinaryClassifier( `_ `Large Margin Classification Using the Perceptron Algorithm - `_ + `_ `Discriminative Training Methods for Hidden Markov Models - `_ + `_ :param normalize: Specifies the type of automatic normalization used: diff --git a/src/python/nimbusml/internal/core/linear_model/fastlinearbinaryclassifier.py b/src/python/nimbusml/internal/core/linear_model/fastlinearbinaryclassifier.py index 10c5c2a5..3d43894a 100644 --- a/src/python/nimbusml/internal/core/linear_model/fastlinearbinaryclassifier.py +++ b/src/python/nimbusml/internal/core/linear_model/fastlinearbinaryclassifier.py @@ -80,8 +80,7 @@ class FastLinearBinaryClassifier( content/uploads/2016/06/main-3.pdf>`_ `Stochastic Dual Coordinate Ascent Methods for Regularized Loss - Minimization `_ + Minimization `_ :param l2_regularization: L2 regularizer constant. By default the l2 diff --git a/src/python/nimbusml/internal/core/linear_model/fastlinearclassifier.py b/src/python/nimbusml/internal/core/linear_model/fastlinearclassifier.py index a2880b79..b377eb11 100644 --- a/src/python/nimbusml/internal/core/linear_model/fastlinearclassifier.py +++ b/src/python/nimbusml/internal/core/linear_model/fastlinearclassifier.py @@ -78,8 +78,7 @@ class FastLinearClassifier( content/uploads/2016/06/main-3.pdf>`_ `Stochastic Dual Coordinate Ascent Methods for Regularized Loss - Minimization `_ + Minimization `_ :param l2_regularization: L2 regularizer constant. By default the l2 diff --git a/src/python/nimbusml/internal/core/linear_model/fastlinearregressor.py b/src/python/nimbusml/internal/core/linear_model/fastlinearregressor.py index cf9073e5..f8a4b5ac 100644 --- a/src/python/nimbusml/internal/core/linear_model/fastlinearregressor.py +++ b/src/python/nimbusml/internal/core/linear_model/fastlinearregressor.py @@ -78,8 +78,7 @@ class FastLinearRegressor( content/uploads/2016/06/main-3.pdf>`_ `Stochastic Dual Coordinate Ascent Methods for Regularized Loss - Minimization `_ + Minimization `_ :param l2_regularization: L2 regularizer constant. By default the l2 diff --git a/src/python/nimbusml/internal/core/linear_model/logisticregressionbinaryclassifier.py b/src/python/nimbusml/internal/core/linear_model/logisticregressionbinaryclassifier.py index 098c92e9..50b344ac 100644 --- a/src/python/nimbusml/internal/core/linear_model/logisticregressionbinaryclassifier.py +++ b/src/python/nimbusml/internal/core/linear_model/logisticregressionbinaryclassifier.py @@ -90,14 +90,14 @@ class LogisticRegressionBinaryClassifier( **Reference** - `Wikipedia: L-BFGS `_ + `Wikipedia: L-BFGS `_ `Wikipedia: Logistic - regression `_ + regression `_ `Scalable Training of L1-Regularized Log-Linear Models - `_ + `_ `Test Run - L1 and L2 Regularization for Machine Learning diff --git a/src/python/nimbusml/internal/core/linear_model/logisticregressionclassifier.py b/src/python/nimbusml/internal/core/linear_model/logisticregressionclassifier.py index 90af2ffb..3fd6efba 100644 --- a/src/python/nimbusml/internal/core/linear_model/logisticregressionclassifier.py +++ b/src/python/nimbusml/internal/core/linear_model/logisticregressionclassifier.py @@ -91,14 +91,14 @@ class LogisticRegressionClassifier( **Reference** - `Wikipedia: L-BFGS `_ + `Wikipedia: L-BFGS `_ `Wikipedia: Logistic - regression `_ + regression `_ `Scalable Training of L1-Regularized Log-Linear Models - `_ + `_ `Test Run - L1 and L2 Regularization for Machine Learning diff --git a/src/python/nimbusml/internal/core/linear_model/sgdbinaryclassifier.py b/src/python/nimbusml/internal/core/linear_model/sgdbinaryclassifier.py index b0c5e898..aada6337 100644 --- a/src/python/nimbusml/internal/core/linear_model/sgdbinaryclassifier.py +++ b/src/python/nimbusml/internal/core/linear_model/sgdbinaryclassifier.py @@ -35,14 +35,14 @@ class SgdBinaryClassifier( associated optimization problem is sparse, then Hogwild SGD achieves a nearly optimal rate of convergence. For a detailed reference, please - refer to `http://arxiv.org/pdf/1106.5730v2.pdf - `_. + refer to `https://arxiv.org/pdf/1106.5730v2.pdf + `_. **Reference** - `http://arxiv.org/pdf/1106.5730v2.pdf - `_ + `https://arxiv.org/pdf/1106.5730v2.pdf + `_ :param normalize: Specifies the type of automatic normalization used: diff --git a/src/python/nimbusml/internal/core/timeseries/ssaforecaster.py b/src/python/nimbusml/internal/core/timeseries/ssaforecaster.py index da39b2c8..ce9064b5 100644 --- a/src/python/nimbusml/internal/core/timeseries/ssaforecaster.py +++ b/src/python/nimbusml/internal/core/timeseries/ssaforecaster.py @@ -30,7 +30,7 @@ class SsaForecaster(BasePipelineItem, DefaultSignature): input time-series where each component in the spectrum corresponds to a trend, seasonal or noise component in the time-series. For details of the Singular Spectrum Analysis (SSA), refer to `this document - `_. + `_. :param window_size: The length of the window on the series for building the trajectory matrix (parameter L). diff --git a/src/python/nimbusml/linear_model/averagedperceptronbinaryclassifier.py b/src/python/nimbusml/linear_model/averagedperceptronbinaryclassifier.py index 0b467a37..3825c9e0 100644 --- a/src/python/nimbusml/linear_model/averagedperceptronbinaryclassifier.py +++ b/src/python/nimbusml/linear_model/averagedperceptronbinaryclassifier.py @@ -67,10 +67,10 @@ class AveragedPerceptronBinaryClassifier( `_ `Large Margin Classification Using the Perceptron Algorithm - `_ + `_ `Discriminative Training Methods for Hidden Markov Models - `_ + `_ :param feature: see `Columns `_. diff --git a/src/python/nimbusml/linear_model/fastlinearbinaryclassifier.py b/src/python/nimbusml/linear_model/fastlinearbinaryclassifier.py index 4758454b..6b99d165 100644 --- a/src/python/nimbusml/linear_model/fastlinearbinaryclassifier.py +++ b/src/python/nimbusml/linear_model/fastlinearbinaryclassifier.py @@ -80,8 +80,7 @@ class FastLinearBinaryClassifier( content/uploads/2016/06/main-3.pdf>`_ `Stochastic Dual Coordinate Ascent Methods for Regularized Loss - Minimization `_ + Minimization `_ :param feature: see `Columns `_. diff --git a/src/python/nimbusml/linear_model/fastlinearclassifier.py b/src/python/nimbusml/linear_model/fastlinearclassifier.py index d1ef7644..58e26ed3 100644 --- a/src/python/nimbusml/linear_model/fastlinearclassifier.py +++ b/src/python/nimbusml/linear_model/fastlinearclassifier.py @@ -77,8 +77,7 @@ class FastLinearClassifier(core, BasePredictor, ClassifierMixin): content/uploads/2016/06/main-3.pdf>`_ `Stochastic Dual Coordinate Ascent Methods for Regularized Loss - Minimization `_ + Minimization `_ :param feature: see `Columns `_. diff --git a/src/python/nimbusml/linear_model/fastlinearregressor.py b/src/python/nimbusml/linear_model/fastlinearregressor.py index 766a79ae..7baba2f6 100644 --- a/src/python/nimbusml/linear_model/fastlinearregressor.py +++ b/src/python/nimbusml/linear_model/fastlinearregressor.py @@ -77,8 +77,7 @@ class FastLinearRegressor(core, BasePredictor, RegressorMixin): content/uploads/2016/06/main-3.pdf>`_ `Stochastic Dual Coordinate Ascent Methods for Regularized Loss - Minimization `_ + Minimization `_ :param feature: see `Columns `_. diff --git a/src/python/nimbusml/linear_model/logisticregressionbinaryclassifier.py b/src/python/nimbusml/linear_model/logisticregressionbinaryclassifier.py index 1cf29de4..76410659 100644 --- a/src/python/nimbusml/linear_model/logisticregressionbinaryclassifier.py +++ b/src/python/nimbusml/linear_model/logisticregressionbinaryclassifier.py @@ -91,14 +91,14 @@ class LogisticRegressionBinaryClassifier( **Reference** - `Wikipedia: L-BFGS `_ + `Wikipedia: L-BFGS `_ `Wikipedia: Logistic - regression `_ + regression `_ `Scalable Training of L1-Regularized Log-Linear Models - `_ + `_ `Test Run - L1 and L2 Regularization for Machine Learning diff --git a/src/python/nimbusml/linear_model/logisticregressionclassifier.py b/src/python/nimbusml/linear_model/logisticregressionclassifier.py index 265adc10..9155799e 100644 --- a/src/python/nimbusml/linear_model/logisticregressionclassifier.py +++ b/src/python/nimbusml/linear_model/logisticregressionclassifier.py @@ -92,14 +92,14 @@ class LogisticRegressionClassifier( **Reference** - `Wikipedia: L-BFGS `_ + `Wikipedia: L-BFGS `_ `Wikipedia: Logistic - regression `_ + regression `_ `Scalable Training of L1-Regularized Log-Linear Models - `_ + `_ `Test Run - L1 and L2 Regularization for Machine Learning diff --git a/src/python/nimbusml/linear_model/sgdbinaryclassifier.py b/src/python/nimbusml/linear_model/sgdbinaryclassifier.py index a5ee573d..893f6465 100644 --- a/src/python/nimbusml/linear_model/sgdbinaryclassifier.py +++ b/src/python/nimbusml/linear_model/sgdbinaryclassifier.py @@ -34,14 +34,14 @@ class SgdBinaryClassifier(core, BasePredictor, ClassifierMixin): associated optimization problem is sparse, then Hogwild SGD achieves a nearly optimal rate of convergence. For a detailed reference, please - refer to `http://arxiv.org/pdf/1106.5730v2.pdf - `_. + refer to `https://arxiv.org/pdf/1106.5730v2.pdf + `_. **Reference** - `http://arxiv.org/pdf/1106.5730v2.pdf - `_ + `https://arxiv.org/pdf/1106.5730v2.pdf + `_ :param feature: see `Columns `_. diff --git a/src/python/nimbusml/timeseries/ssaforecaster.py b/src/python/nimbusml/timeseries/ssaforecaster.py index dd7e0296..3cbe540f 100644 --- a/src/python/nimbusml/timeseries/ssaforecaster.py +++ b/src/python/nimbusml/timeseries/ssaforecaster.py @@ -31,7 +31,7 @@ class SsaForecaster(core, BaseTransform, TransformerMixin): input time-series where each component in the spectrum corresponds to a trend, seasonal or noise component in the time-series. For details of the Singular Spectrum Analysis (SSA), refer to `this document - `_. + `_. :param columns: see `Columns `_. diff --git a/src/python/setup.py b/src/python/setup.py index fc350275..a200d2fd 100644 --- a/src/python/setup.py +++ b/src/python/setup.py @@ -148,7 +148,7 @@ # Although 'package_data' is the preferred approach, in some case # you may need to place data files outside of your packages. See: - # http://docs.python.org/3.4/distutils/setupscript.html#installing + # https://docs.python.org/3.4/distutils/setupscript.html#installing # -additional-files # noqa # In this case, 'data_file' will be installed into # '/my_data' diff --git a/src/python/setup.py.in b/src/python/setup.py.in index e65db7d8..b4371c29 100644 --- a/src/python/setup.py.in +++ b/src/python/setup.py.in @@ -148,7 +148,7 @@ setup( # Although 'package_data' is the preferred approach, in some case # you may need to place data files outside of your packages. See: - # http://docs.python.org/3.4/distutils/setupscript.html#installing + # https://docs.python.org/3.4/distutils/setupscript.html#installing # -additional-files # noqa # In this case, 'data_file' will be installed into # '/my_data' diff --git a/src/python/tools/changeHttpURLsToHttps.py b/src/python/tools/changeHttpURLsToHttps.py new file mode 100644 index 00000000..d7202777 --- /dev/null +++ b/src/python/tools/changeHttpURLsToHttps.py @@ -0,0 +1,45 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# ------------------------------------------------------------------------- + +# Converts all valid HTTP links to HTTPS, where the fed +# HTTP links are found in Report_AlterableUrls_FindHttpURLs.csv, which +# is generated by FindHttpURLs.py +# usage: python3 changeHttpURLsToHttps.py [PATH_TO_Report_FindHttpURLs.txt] [PATH_TO_ROOT_OF_NIMBUSML_DIRECTORY] +# output: Report_ReplaceHttpsURLs.txt + +import sys +import os +import csv + +def changeUrls(pathToReportCsv, pathToRootDirectory): + with open(pathToReportCsv, newline='') as csvFile: + csv_reader = csv.reader(csvFile, delimiter='\t') + line_count = 0 + for row in csv_reader: + if line_count == 0: + line_count += 1 + else: + #URL: row[0] + #relativePath: row[1] + print(row[1]) + absolutePath = pathToRootDirectory+row[1] + fullText = open(absolutePath).read() + fullText = fullText.replace(row[0], row[0].replace('http', 'https')) + f = open(absolutePath, 'w') + f.write(fullText) + f.close() + print("Altered {} in file: {}".format(row[0], absolutePath)) + line_count += 1 + print(f'Processed {line_count} URLs.') + +def main(): + if len(sys.argv) < 3: + print("Usage: python3 changeHttpURLsToHttps.py [PATH_TO_Report_FindHttpURLs.txt] [PATH_TO_ORIGINAL_NIMBUSML_DIRECTORY]") + exit(1) + changeUrls(sys.argv[1], sys.argv[2]) + + +if __name__ == "__main__": + main() diff --git a/src/python/tools/findHttpURLs.py b/src/python/tools/findHttpURLs.py new file mode 100644 index 00000000..823f6966 --- /dev/null +++ b/src/python/tools/findHttpURLs.py @@ -0,0 +1,103 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# ------------------------------------------------------------------------- + +# Finds all HTTP URLs found in the NimbusML repository +# Converts all valid HTTP links to HTTPS +# Usage: python3 findHttpURLs.py [PATH_TO_NimbusML_REPOSITORY] +# Output: Report_AlterableUrls_FindHttpURLs.csv, [Report_NonAlterableUrls_FindHttpURLs.csv, Report_InvalidUrls_FindHttpURLs.csv] + +# Required non-standard pip library: urlextract + +import sys +import os +import requests +import csv +import collections +import pathlib +from urlextract import URLExtract + +def addToDictionary(dict, key, value): + if key not in dict: + dict[key] = [value] + else: + if value not in dict[key]: + dict[key].append(value) + return dict + +def findHttpUrls(searchRootDirectory): + alterableUrlsStore = {} + nonAlterableUrlsStore = {} + invalidUrlsStore = {} + extractor = URLExtract() + lengthOfOriginalRootPath = -1 + for root, _, files in os.walk(searchRootDirectory, onerror=None): + if lengthOfOriginalRootPath == -1: + lengthOfOriginalRootPath = len(root) + for filename in files: + if pathlib.Path(filename).suffix in ['.props', '.pyproj', '.vcxproj', '.snk'] or '.git' in root: + continue + absoluteFilePath = os.path.join(root, filename) + relativeFilePath = '.' + absoluteFilePath[lengthOfOriginalRootPath:] + try: + with open(absoluteFilePath, "rb") as f: + data = f.read() + try: + data = data.decode("utf-8") + except Exception as e: + print("Unable to decodefile: {} in UTF-8 Encoding.".format(relativeFilePath)) + print(str(e)) + continue + currentUrlList = extractor.find_urls(data) + currentUrlList = [url for url in currentUrlList if url[:5] == "http:"] + for selectedUrl in currentUrlList: + try: + request = requests.get(selectedUrl) + if request.status_code == 200: + changedSelectedUrl = selectedUrl.replace("http", "https") + try: + newRequest = requests.get(changedSelectedUrl) + if newRequest.status_code == 200: + alterableUrlsStore = addToDictionary(alterableUrlsStore, selectedUrl, relativeFilePath) + else: + nonAlterableUrlsStore = addToDictionary(nonAlterableUrlsStore, selectedUrl, relativeFilePath) + except: + nonAlterableUrlsStore = addToDictionary(nonAlterableUrlsStore, selectedUrl, relativeFilePath) + else: + invalidUrlsStore = addToDictionary(invalidUrlsStore, selectedUrl, relativeFilePath) + except ConnectionError: + invalidUrlsStore = addToDictionary(invalidUrlsStore, selectedUrl, relativeFilePath) + except (IOError, OSError): + pass + makeReports(alterableUrlsStore, nonAlterableUrlsStore, invalidUrlsStore) + +def makeReports(alterableUrlsStore, nonAlterableUrlsStore, invalidUrlsStore): + with open('Report_AlterableUrls_FindHttpURLs.csv', mode='w', newline='') as csv_file: + writer1 = csv.writer(csv_file, delimiter='\t', quotechar='|', quoting=csv.QUOTE_MINIMAL) + writer1.writerow(["url", "relativeFilepath"]) + for urlKey in alterableUrlsStore: + for fileValue in alterableUrlsStore[urlKey]: + writer1.writerow([urlKey, fileValue]) + with open('Report_NonAlterableUrls_FindHttpURLs.csv', mode='w', newline='') as csv_file: + writer2 = csv.writer(csv_file, delimiter='\t', quotechar='|', quoting=csv.QUOTE_MINIMAL) + writer2.writerow(["url", "relativeFilepath"]) + for urlKey in nonAlterableUrlsStore: + for fileValue in nonAlterableUrlsStore[urlKey]: + writer2.writerow([urlKey, fileValue]) + with open('Report_InvalidUrls_FindHttpURLs.csv', mode='w', newline='') as csv_file: + writer3 = csv.writer(csv_file, delimiter='\t', quotechar='|', quoting=csv.QUOTE_MINIMAL) + writer3.writerow(["url", "relativeFilepath"]) + for urlKey in invalidUrlsStore: + for fileValue in invalidUrlsStore[urlKey]: + writer3.writerow([urlKey, fileValue]) + return + +def main(): + if len(sys.argv) < 2: + print("Usage: python3 findHttpURLs.py [PATH_TO_NimbusML_REPOSITORY]") + exit(1) + findHttpUrls(sys.argv[1]) + +if __name__ == "__main__": + main()