From 5eef6ac3bd083ce60a4da3d9967f526f98816eb6 Mon Sep 17 00:00:00 2001 From: kennethmhc Date: Thu, 19 May 2022 09:19:46 +0200 Subject: [PATCH] [HOPSWORKS-3125] feature view (#1089) * [HOPSWORKS-3125] Feature view (#923) * [HOPSWORKS-2945] [FeatureView] Implement activity endpoints (#829) * init * big fixes * Update ActivityResource.java * Update FeatureViewController.java * Update FeatureViewController.java * add logging of activity for FV * Update feature_store_activity_spec.rb * Update feature_store_activity_spec.rb * Update featurestore_helper.rb * Update featurestore_helper.rb * Update FeatureViewController.java * add statistics * [HOPSWORKS-2947] [ModelFeature] Implement PrepareStatementResource (#824) * init * improvements * small changes to the backend * temp * Update FeatureViewController.java * Update preparedstatements_spec.rb * standardize * Update featurestore_helper.rb * small updates * small changes * Update preparedstatements_spec.rb * addressing feedback * change endpoint to lower case * [HOPSWORKS-2943] Add query resource (#834) * implement batch query endpoint * handle empty start or end time * throw feature store exception * return original query * check nested join * fix NPE for join * add feature group to feature * return features properly. * add IT for batch query * fix batch query test * set commit time to query * add timetravel condition * reuse get_query from TrainingDatasetController * pass withLabel and isHiveEngine when constructing batch query * reformat * test get query * add test for get query * remove object variables * fix indentation * refactor getJoinsSorted * fix format * reformat * use getSingleByNameVersionAndFeatureStore * add query filter to FV * refactor event time filter, add unit test * fix resourcerequest NPE * add featureview to trainingdatasetfilter * add query filter to IT Co-authored-by: Kenneth Mak * [HOPSWORKS-2944] Implement keyword related endpoints (#842) * needs inode to function * add inode * Update FeatureView.java * Update FeatureViewController.java * Update ProjectController.java * changes with inode * Update FeatureViewController.java * inode work * Update FeatureViewController.java * Update FeatureViewController.java * Update FeatureViewController.java * Update FeatureViewController.java * Update ProjectController.java * permissions * small changes * tests * Update featureview_keywords_spec.rb * some feedback addressed * add featureView xattr * Update FeatureViewBuilder.java * Update FeatureViewController.java * Update FeatureViewController.java * Update FeatureViewController.java * save hopsfs connector * Update ProjectController.java * Update FeatureViewController.java * Update FeatureViewController.java * add features to featureViewDTO * Update FeatureViewBuilder.java * Update HopsFSProvenanceController.java * removing fv dir * Update FeaturestoreController.java * path changes for fv * restructure fv path * Update HopsFSProvenanceController.java * Update HopsworksJAXBContext.java * Update featureview_keywords_spec.rb * some of the comments addressed * remove keyword controller duplication * make createFeatureView use and return FeatureView instead of FeatureViewDTO * Update FeatureViewBuilder.java * Update FeaturestoreKeywordResource.java * change feature view path * Update featureview_keywords_spec.rb * Update featureview_keywords_spec.rb * [HOPSWORKS-2946] Add transformation resource and statistics resource (#856) * get transformation function * statistics resource * fix statistics dto forTransformation * fix transformation uri * add IT for transformation * change statistics folder name * remove td from feature view statistics * delete statistics along with feature view * add IT for feature view statistics * Revert "add IT for feature view statistics" This reverts commit db49dd0971fe1cdc7929f8fd0c13a5249c451ff0. * Revert "delete statistics along with feature view" This reverts commit 697768d51b186c14e2d451858f0552df1d8ffa77. * Revert "change statistics folder name" This reverts commit 922c17f94c14c50a771b7d70f04154712305ab19. * Revert "fix statistics dto forTransformation" This reverts commit 3143984906d6cc8eced6fb3349d24c4a0a4df360. * Revert "statistics resource" This reverts commit 7361bd8ff06ffff239285bfb8360f25ba14ef68a. * fix access control * refactor uri * [FeatureView] return original query as expansion (#866) * [HOPSWORKS-2942] Implement TagResource (#864) * init * Update TagResource.java * Update TagResource.java * bug fixing/standardization * tests * generic tags resource * fix for the abstract tags resource * some feedback addressed * Update TagsResource.java Co-authored-by: Alex Ormenisan * [HOPSWORKS-3064] [FeatureView] Allow update of metadata in FeatureView resource (#895) * init * Update FeatureViewResource.java * bug fixing * Update featureview_spec.rb * Update featureview_spec.rb * [HOPSWORKS-2941] Implement Training Dataset resource (#845) * implement batch query endpoint * throw feature store exception * return features properly. * add IT for batch query * fix batch query test * reuse get_query from TrainingDatasetController * create td * compute td * get td * delete td * delete td data only * fix compile * reformat * set feature view * set featurestore and project * implement statistics resource * handle hopsfstrainingdataset is null * return all td * fix create and get td * do not return query in dto * skip feature in dto * set query in job config * add td IT * add td IT * fix internal td it * external td IT * add external td test * rename create td method * reformat * move query related methods to QueryController * revert unintended changes * refactor get FV * fix failed tests * fix comments * check name of training dataset in IT * check name of feature view against td * keep td folder when deleting data only * fix failed test * fix failed test * remove extra update * return features after creating fv * fix td exist error * create feature view td job * do not assume a split if splits is empty * remove extra get annotation * set default td name = fv name + version * return batch query as object * set start time and end time to td * return feature group in correct type * throw exception if feature not found * fix test * fix feature to featuredto map * fix query test * [Append] training dataset resource remove redundant lines in test * rebase * [Hopsworks-3063] [FeatureView] Allow update of metadata in TrainingDataset resource (#921) * init * Update featurestore_helper.rb * Update featureview_trainingdataset_spec.rb * remove label from feature view table (#924) * [HOPSWORKS-3073] [FeatureView] Remove ResourceRequest from FeatureViewController (#925) * init small changes also on other endpoints to standardize * addressing the feedback * [HOPSWORKS-2941] [Append] td resource (#930) * fix dataset * fix infinite loop when getting all feature groups * change event time type * use date as event time in batch query * add in memory td * fix prepared statement comment * refactor commit time * fix keyword resource * fix transformation * remove unused import * assign storage connector to in-memory td * fix integration test Co-authored-by: Ralf Co-authored-by: Kenneth Mak Co-authored-by: Alex Ormenisan (cherry picked from commit af28c64821fd80f63ded0841b7b3770385fb3445) # Conflicts: # featurestore-ee/src/main/java/io/hops/hopsworks/featurestore/KeywordController.java # hopsworks-IT/src/test/ruby/spec/ee_tags_spec.rb # hopsworks-IT/src/test/ruby/spec/helpers/tag_helper.rb # hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/featureview/FeatureViewResource.java # hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/featureview/FeatureViewService.java # hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/featureview/FeatureViewTagResource.java # hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/preparestatement/PreparedStatementResource.java # hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/query/QueryResource.java # hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/trainingdataset/TrainingDatasetResource.java # hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/transformation/TransformationResource.java * [HOPSWORKS-3125] [append] Feature view fix end time error #944 (cherry picked from commit 3c064ff6453d07feb0e5b3dd7e4c10f82c02d697) --- .../ruby/spec/feature_store_activity_spec.rb | 32 + .../src/test/ruby/spec/featureview_query.rb | 138 ++++ .../src/test/ruby/spec/featureview_spec.rb | 68 ++ .../spec/featureview_trainingdataset_spec.rb | 621 ++++++++++++++++++ .../ruby/spec/featureview_transformation.rb | 63 ++ .../ruby/spec/helpers/featurestore_helper.rb | 219 +++++- .../spec/helpers/storage_connector_helper.rb | 4 + .../test/ruby/spec/preparedstatements_spec.rb | 410 ++++++++++++ .../FeaturestoreKeywordBuilder.java | 22 +- .../FeaturestoreKeywordResource.java | 60 +- .../api/featurestore/FsQueryBuilder.java | 10 +- .../activities/ActivityBuilder.java | 64 ++ .../activities/ActivityResource.java | 13 +- .../featureview/FeatureViewBuilder.java | 47 +- .../featureview/FeatureViewController.java | 244 +++---- .../featureview/FeatureViewResource.java | 115 +++- .../featureview/FeatureViewService.java | 74 ++- .../featureview/FeatureViewTagResource.java | 24 +- .../PreparedStatementResource.java | 42 +- .../api/featurestore/query/QueryResource.java | 94 ++- .../statistics/StatisticsBuilder.java | 33 + .../statistics/StatisticsResource.java | 4 + .../PreparedStatementBuilder.java | 174 +++-- .../TrainingDatasetResource.java | 268 +++++++- .../TrainingDatasetService.java | 4 - .../TransformationResource.java | 54 +- .../TransformationFunctionBuilder.java | 56 +- .../hopsworks/common/api/ResourceRequest.java | 5 +- .../dao/user/activity/ActivityFacade.java | 4 +- .../featurestore/FeaturestoreController.java | 6 +- .../featurestore/FeaturestoreEntityDTO.java | 3 +- .../activity/FeaturestoreActivityFacade.java | 38 +- .../app/FsJobManagerController.java | 61 +- .../featuregroup/FeaturegroupController.java | 2 +- .../featuregroup/FeaturegroupDTO.java | 2 +- .../featureview/FeatureViewDTO.java | 10 - .../featureview/FeatureViewFacade.java | 6 +- .../keyword/KeywordControllerIface.java | 21 +- .../common/featurestore/query/Feature.java | 1 + .../common/featurestore/query/Query.java | 18 - .../featurestore/query/QueryBuilder.java | 237 +++++++ .../featurestore/query/QueryController.java | 105 +++ .../query/filter/FilterController.java | 18 +- .../query/pit/PitJoinController.java | 3 +- .../columns/StatisticColumnController.java | 11 +- .../trainingdatasets/DateAdapter.java | 33 + .../TrainingDatasetController.java | 378 ++++++++--- .../trainingdatasets/TrainingDatasetDTO.java | 37 +- .../TrainingDatasetDTOBuilder.java | 65 ++ .../TrainingDatasetFacade.java | 82 ++- .../TrainingDatasetInputValidation.java | 7 +- .../HopsfsTrainingDatasetController.java | 17 +- .../xattr/dto/FeatureViewXAttrDTO.java | 96 +++ .../dto/FeaturestoreXAttrsConstants.java | 1 + .../common/project/ProjectController.java | 2 +- .../core/HopsFSProvenanceController.java | 48 +- .../common/util/HopsworksJAXBContext.java | 4 +- .../query/TestQueryController.java | 43 ++ .../activity/FeaturestoreActivity.java | 9 + .../activity/FeaturestoreActivityMeta.java | 3 +- .../featurestore/featureview/FeatureView.java | 49 +- .../trainingdataset/TrainingDataset.java | 80 ++- .../TrainingDatasetFilter.java | 18 + .../trainingdataset/TrainingDatasetType.java | 2 + .../hops/hopsworks/restutils/RESTCodes.java | 6 +- 65 files changed, 3900 insertions(+), 588 deletions(-) create mode 100644 hopsworks-IT/src/test/ruby/spec/featureview_query.rb create mode 100644 hopsworks-IT/src/test/ruby/spec/featureview_spec.rb create mode 100644 hopsworks-IT/src/test/ruby/spec/featureview_trainingdataset_spec.rb create mode 100644 hopsworks-IT/src/test/ruby/spec/featureview_transformation.rb create mode 100644 hopsworks-IT/src/test/ruby/spec/preparedstatements_spec.rb create mode 100644 hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/query/QueryBuilder.java create mode 100644 hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/trainingdatasets/DateAdapter.java create mode 100644 hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/trainingdatasets/TrainingDatasetDTOBuilder.java create mode 100644 hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/xattr/dto/FeatureViewXAttrDTO.java diff --git a/hopsworks-IT/src/test/ruby/spec/feature_store_activity_spec.rb b/hopsworks-IT/src/test/ruby/spec/feature_store_activity_spec.rb index 373cfca06e..414a313b05 100644 --- a/hopsworks-IT/src/test/ruby/spec/feature_store_activity_spec.rb +++ b/hopsworks-IT/src/test/ruby/spec/feature_store_activity_spec.rb @@ -206,5 +206,37 @@ expect(activity["items"][0]["type"]).to eql("METADATA") expect(activity["items"][0]["metadata"]).to eql("The training dataset was created") end + + it "should be able to retrieve feature view creation event" do + featurestore_id = get_featurestore_id(@project[:id]) + json_result, _ = create_cached_featuregroup(@project[:id], featurestore_id) + expect_status_details(201) + + parsed_json = JSON.parse(json_result) + fg_id = parsed_json["id"] + # create queryDTO object + query = { + leftFeatureGroup: { + id: fg_id + }, + leftFeatures: ['testfeature'].map do |feat_name| + {name: feat_name} + end, + joins: [] + } + + json_result, _ = create_feature_view(@project.id, featurestore_id, query) + parsed_json = JSON.parse(json_result) + expect_status(201) + + feature_view_name = parsed_json["name"] + feature_view_version = parsed_json["version"] + json_result = get "#{ENV['HOPSWORKS_API']}/project/#{@project.id}/featurestores/#{featurestore_id}/featureview/#{feature_view_name}/version/#{feature_view_version}/activity" + expect_status_details(200) + + activity = JSON.parse(response.body) + expect(activity["items"][0]["type"]).to eql("METADATA") + expect(activity["items"][0]["metadata"]).to eql("The feature view was created") + end end end \ No newline at end of file diff --git a/hopsworks-IT/src/test/ruby/spec/featureview_query.rb b/hopsworks-IT/src/test/ruby/spec/featureview_query.rb new file mode 100644 index 0000000000..9d42d89ac5 --- /dev/null +++ b/hopsworks-IT/src/test/ruby/spec/featureview_query.rb @@ -0,0 +1,138 @@ +# This file is part of Hopsworks +# Copyright (C) 2022, Logical Clocks AB. All rights reserved +# +# Hopsworks is free software: you can redistribute it and/or modify it under the terms of +# the GNU Affero General Public License as published by the Free Software Foundation, +# either version 3 of the License, or (at your option) any later version. +# +# Hopsworks is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; +# without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +# PURPOSE. See the GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License along with this program. +# If not, see . + +require 'json' + +describe "On #{ENV['OS']}" do + after(:all) { clean_all_test_projects(spec: "featureviewquery") } + + describe "feature view query" do + describe "internal" do + context 'with valid project, featurestore service enabled' do + before :all do + with_valid_project + end + + it "should be able to create batch query" do + featurestore_id = get_featurestore_id(@project.id) + featurestore_name = get_featurestore_name(@project.id) + featuregroup_suffix = short_random_id + project_name = @project.projectname.downcase + query = make_sample_query(@project, featurestore_id, featuregroup_suffix: featuregroup_suffix) + json_result, _ = create_feature_view(@project.id, featurestore_id, query) + parsed_json = JSON.parse(json_result) + expect_status(201) + + feature_view_name = parsed_json["name"] + feature_view_version = parsed_json["version"] + query_result = get "#{ENV['HOPSWORKS_API']}/project/#{@project.id}/featurestores/#{featurestore_id}/featureview/#{feature_view_name}/version/#{feature_view_version}/query/batch?start_time=1234&end_time=4321" + expect_status_details(200) + + parsed_query_result = JSON.parse(query_result) + expect(parsed_query_result["featureStoreId"]).to eql(featurestore_id) + expect(parsed_query_result["featureStoreName"]).to eql(featurestore_name) + expect(parsed_query_result["leftFeatureGroup"]["id"]).to eql(query[:leftFeatureGroup][:id]) + expect(parsed_query_result["leftFeatures"][0]["name"]).to eql(query[:leftFeatures][0][:name]) + expect(parsed_query_result["leftFeatures"][1]["name"]).to eql(query[:leftFeatures][1][:name]) + expect(parsed_query_result["joins"][0]["query"]["leftFeatureGroup"]["id"]).to eql(query[:joins][0][:query][:leftFeatureGroup][:id]) + expect(parsed_query_result["joins"][0]["query"]["leftFeatures"][0]["name"]).to eql(query[:joins][0][:query][:leftFeatures][1][:name]) + # a_testfeature1 > 0 + expect(parsed_query_result["filter"]["leftLogic"]["leftLogic"]["leftFilter"]["feature"]["name"]).to eql(query[:filter][:leftFilter][:feature][:name]) + expect(parsed_query_result["filter"]["leftLogic"]["leftLogic"]["leftFilter"]["condition"]).to eql(query[:filter][:leftFilter][:condition]) + expect(parsed_query_result["filter"]["leftLogic"]["leftLogic"]["leftFilter"]["value"]).to eql(query[:filter][:leftFilter][:value]) + # ts <= 1234 + expect(parsed_query_result["filter"]["leftLogic"]["rightLogic"]["leftFilter"]["feature"]["name"]).to eql("ts") + expect(parsed_query_result["filter"]["leftLogic"]["rightLogic"]["leftFilter"]["condition"]).to eql("GREATER_THAN_OR_EQUAL") + expect(parsed_query_result["filter"]["leftLogic"]["rightLogic"]["leftFilter"]["value"]).to eql("1234") + # ts >= 4321 + expect(parsed_query_result["filter"]["rightLogic"]["leftFilter"]["feature"]["name"]).to eql("ts") + expect(parsed_query_result["filter"]["rightLogic"]["leftFilter"]["condition"]).to eql("LESS_THAN_OR_EQUAL") + expect(parsed_query_result["filter"]["rightLogic"]["leftFilter"]["value"]).to eql("4321") + end + + it "should be able to retrieve original query" do + featurestore_id = get_featurestore_id(@project.id) + featurestore_name = get_featurestore_name(@project.id) + project_name = @project.projectname.downcase + featuregroup_suffix = short_random_id + query = make_sample_query(@project, featurestore_id, featuregroup_suffix: featuregroup_suffix) + json_result, _ = create_feature_view(@project.id, featurestore_id, query) + parsed_json = JSON.parse(json_result) + expect_status(201) + + feature_view_name = parsed_json["name"] + feature_view_version = parsed_json["version"] + query_result = get "#{ENV['HOPSWORKS_API']}/project/#{@project.id}/featurestores/#{featurestore_id}/featureview/#{feature_view_name}/version/#{feature_view_version}/query" + expect_status_details(200) + parsed_query_result = JSON.parse(query_result) + expect(parsed_query_result["featureStoreId"]).to eql(featurestore_id) + expect(parsed_query_result["featureStoreName"]).to eql(featurestore_name) + expect(parsed_query_result["leftFeatureGroup"]["id"]).to eql(query[:leftFeatureGroup][:id]) + expect(parsed_query_result["leftFeatures"][0]["name"]).to eql(query[:leftFeatures][0][:name]) + expect(parsed_query_result["leftFeatures"][1]["name"]).to eql(query[:leftFeatures][1][:name]) + expect(parsed_query_result["joins"][0]["query"]["leftFeatureGroup"]["id"]).to eql(query[:joins][0][:query][:leftFeatureGroup][:id]) + expect(parsed_query_result["joins"][0]["query"]["leftFeatures"][0]["name"]).to eql(query[:joins][0][:query][:leftFeatures][1][:name]) + expect(parsed_query_result["filter"]["leftFilter"]["feature"]["name"]).to eql(query[:filter][:leftFilter][:feature][:name]) + expect(parsed_query_result["filter"]["leftFilter"]["condition"]).to eql(query[:filter][:leftFilter][:condition]) + expect(parsed_query_result["filter"]["leftFilter"]["value"]).to eql(query[:filter][:leftFilter][:value]) + end + + it "should be able to create batch query using retrieved query" do + featurestore_id = get_featurestore_id(@project.id) + featurestore_name = get_featurestore_name(@project.id) + project_name = @project.projectname.downcase + featuregroup_suffix = short_random_id + query = make_sample_query(@project, featurestore_id, featuregroup_suffix: featuregroup_suffix) + json_result, _ = create_feature_view(@project.id, featurestore_id, query) + expect_status(201) + parsed_json = JSON.parse(json_result) + + feature_view_name = parsed_json["name"] + feature_view_version = parsed_json["version"] + query_result = get "#{ENV['HOPSWORKS_API']}/project/#{@project.id}/featurestores/#{featurestore_id}/featureview/#{feature_view_name}/version/#{feature_view_version}/query" + expect_status_details(200) + parsed_query_result = JSON.parse(query_result) + json_result, _ = create_feature_view(@project.id, featurestore_id, parsed_query_result) + expect_status(201) + parsed_json_new = JSON.parse(json_result) + feature_view_version_new = parsed_json_new["version"] + + query_result = get "#{ENV['HOPSWORKS_API']}/project/#{@project.id}/featurestores/#{featurestore_id}/featureview/#{feature_view_name}/version/#{feature_view_version_new}/query/batch?start_time=1234&end_time=4321" + expect_status_details(200) + + parsed_query_result = JSON.parse(query_result) + expect(parsed_query_result["featureStoreId"]).to eql(featurestore_id) + expect(parsed_query_result["featureStoreName"]).to eql(featurestore_name) + expect(parsed_query_result["leftFeatureGroup"]["id"]).to eql(query[:leftFeatureGroup][:id]) + expect(parsed_query_result["leftFeatures"][0]["name"]).to eql(query[:leftFeatures][0][:name]) + expect(parsed_query_result["leftFeatures"][1]["name"]).to eql(query[:leftFeatures][1][:name]) + expect(parsed_query_result["joins"][0]["query"]["leftFeatureGroup"]["id"]).to eql(query[:joins][0][:query][:leftFeatureGroup][:id]) + expect(parsed_query_result["joins"][0]["query"]["leftFeatures"][0]["name"]).to eql(query[:joins][0][:query][:leftFeatures][1][:name]) + # a_testfeature1 > 0 + expect(parsed_query_result["filter"]["leftLogic"]["leftLogic"]["leftFilter"]["feature"]["name"]).to eql(query[:filter][:leftFilter][:feature][:name]) + expect(parsed_query_result["filter"]["leftLogic"]["leftLogic"]["leftFilter"]["condition"]).to eql(query[:filter][:leftFilter][:condition]) + expect(parsed_query_result["filter"]["leftLogic"]["leftLogic"]["leftFilter"]["value"]).to eql(query[:filter][:leftFilter][:value]) + # ts <= 1234 + expect(parsed_query_result["filter"]["leftLogic"]["rightLogic"]["leftFilter"]["feature"]["name"]).to eql("ts") + expect(parsed_query_result["filter"]["leftLogic"]["rightLogic"]["leftFilter"]["condition"]).to eql("GREATER_THAN_OR_EQUAL") + expect(parsed_query_result["filter"]["leftLogic"]["rightLogic"]["leftFilter"]["value"]).to eql("1234") + # ts >= 4321 + expect(parsed_query_result["filter"]["rightLogic"]["leftFilter"]["feature"]["name"]).to eql("ts") + expect(parsed_query_result["filter"]["rightLogic"]["leftFilter"]["condition"]).to eql("LESS_THAN_OR_EQUAL") + expect(parsed_query_result["filter"]["rightLogic"]["leftFilter"]["value"]).to eql("4321") + end + end + end + end +end diff --git a/hopsworks-IT/src/test/ruby/spec/featureview_spec.rb b/hopsworks-IT/src/test/ruby/spec/featureview_spec.rb new file mode 100644 index 0000000000..17bc13b10e --- /dev/null +++ b/hopsworks-IT/src/test/ruby/spec/featureview_spec.rb @@ -0,0 +1,68 @@ +# This file is part of Hopsworks +# Copyright (C) 2022, Logical Clocks AB. All rights reserved +# +# Hopsworks is free software: you can redistribute it and/or modify it under the terms of +# the GNU Affero General Public License as published by the Free Software Foundation, +# either version 3 of the License, or (at your option) any later version. +# +# Hopsworks is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; +# without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +# PURPOSE. See the GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License along with this program. +# If not, see . + +require 'json' + +describe "On #{ENV['OS']}" do + after(:all) {clean_all_test_projects(spec: "featureview")} + + describe "feature view" do + describe "internal" do + context 'with valid project, featurestore service enabled' do + before :all do + with_valid_project + end + + it "should not be able to update feature view that doesnt exist" do + featurestore_id = get_featurestore_id(@project.id) + + json_data = { + name: "feature_view_name", + version: 1, + description: "testfeatureviewdescription" + } + + update_feature_view(@project.id, featurestore_id, json_data) + expect_status(404) + end + + it "should be able to update the description of a feature view" do + featurestore_id = get_featurestore_id(@project.id) + + json_result, fg_name = create_cached_featuregroup(@project.id, featurestore_id, online:true) + parsed_json = JSON.parse(json_result) + + json_result, _ = create_feature_view_from_feature_group(@project.id, featurestore_id, parsed_json) + parsed_json = JSON.parse(json_result) + expect_status(201) + featureview_name = parsed_json["name"] + featureview_version = parsed_json["version"] + + new_description = "new_testfeatureviewdescription" + json_data = { + name: featureview_name, + version: featureview_version, + description: new_description + } + + json_result = update_feature_view(@project.id, featurestore_id, json_data) + parsed_json = JSON.parse(json_result) + expect_status(200) + + expect(parsed_json["description"]).to eql(new_description) + end + end + end + end +end diff --git a/hopsworks-IT/src/test/ruby/spec/featureview_trainingdataset_spec.rb b/hopsworks-IT/src/test/ruby/spec/featureview_trainingdataset_spec.rb new file mode 100644 index 0000000000..67a7540fa3 --- /dev/null +++ b/hopsworks-IT/src/test/ruby/spec/featureview_trainingdataset_spec.rb @@ -0,0 +1,621 @@ +# This file is part of Hopsworks +# Copyright (C) 2020, Logical Clocks AB. All rights reserved +# +# Hopsworks is free software: you can redistribute it and/or modify it under the terms of +# the GNU Affero General Public License as published by the Free Software Foundation, +# either version 3 of the License, or (at your option) any later version. +# +# Hopsworks is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; +# without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +# PURPOSE. See the GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License along with this program. +# If not, see . + +require 'json' + +describe "On #{ENV['OS']}" do + after(:all) { clean_all_test_projects(spec: "featureview_trainingdataset") } + + describe "training dataset" do + describe "internal" do + context 'with valid project, featurestore service enabled' do + before :all do + with_valid_project + end + + it "should be able to add a hopsfs training dataset to the featurestore" do + featurestore_name = get_featurestore_name(@project.id) + all_metadata = create_featureview_training_dataset_from_project(@project) + parsed_json = all_metadata["response"] + connector = all_metadata["connector"] + featureview = all_metadata["featureView"] + + expect(parsed_json.key?("id")).to be true + expect(parsed_json.key?("featurestoreName")).to be true + expect(parsed_json.key?("name")).to be true + expect(parsed_json["creator"].key?("email")).to be true + expect(parsed_json.key?("location")).to be true + expect(parsed_json.key?("version")).to be true + expect(parsed_json.key?("dataFormat")).to be true + expect(parsed_json.key?("trainingDatasetType")).to be true + expect(parsed_json.key?("location")).to be true + expect(parsed_json.key?("inodeId")).to be true + expect(parsed_json.key?("seed")).to be true + expect(parsed_json["featurestoreName"] == featurestore_name).to be true + expect(parsed_json["name"] == "#{featureview['name']}_#{featureview['version']}").to be true + expect(parsed_json["trainingDatasetType"] == "HOPSFS_TRAINING_DATASET").to be true + expect(parsed_json["storageConnector"]["id"] == connector.id).to be true + expect(parsed_json["seed"] == 1234).to be true + + # Make sure the location contains the scheme (hopsfs) and the authority + uri = URI(parsed_json["location"]) + expect(uri.scheme).to eql("hopsfs") + # If the port is available we can assume that the IP is as well. + expect(uri.port).to eql(8020) + end + + it "should not be able to add a hopsfs training dataset to the featurestore without specifying a data format" do + all_metadata = create_featureview_training_dataset_from_project(@project, expected_status_code: 400, data_format: "not_exist") + parsed_json = all_metadata["response"] + expect(parsed_json.key?("errorCode")).to be true + expect(parsed_json.key?("errorMsg")).to be true + expect(parsed_json.key?("usrMsg")).to be true + expect(parsed_json["errorCode"] == 270057).to be true + end + + it "should not be able to add a hopsfs training dataset to the featurestore with an invalid version" do + all_metadata = create_featureview_training_dataset_from_project(@project, expected_status_code: 400, version: -1) + parsed_json = all_metadata["response"] + expect(parsed_json.key?("errorCode")).to be true + expect(parsed_json.key?("errorMsg")).to be true + expect(parsed_json.key?("usrMsg")).to be true + expect(parsed_json["errorCode"] == 270058).to be true + end + + it "should be able to add a new hopsfs training dataset without version to the featurestore" do + all_metadata = create_featureview_training_dataset_from_project(@project, version: nil) + parsed_json = all_metadata["response"] + expect(parsed_json["version"] == 1).to be true + end + + it "should be able to add a new version of an existing hopsfs training dataset without version to the featurestore" do + all_metadata = create_featureview_training_dataset_from_project(@project) + featureview = all_metadata["featureView"] + connector = all_metadata["connector"] + + # add second version + json_result, _ = create_featureview_training_dataset(@project.id, featureview, connector, + version: nil) + parsed_json = JSON.parse(json_result) + expect_status(201) + # version should be incremented to 2 + expect(parsed_json["version"] == 2).to be true + end + + it "should be able to add a hopsfs training dataset to the featurestore with splits" do + splits = [ + { + name: "test_split", + percentage: 0.8 + }, + { + name: "train_split", + percentage: 0.2 + } + ] + all_metadata = create_featureview_training_dataset_from_project(@project, splits: splits, + train_split: "train_split") + parsed_json = all_metadata["response"] + expect(parsed_json.key?("splits")).to be true + expect(parsed_json["splits"].length).to be 2 + end + + it "should not be able to add a hopsfs training dataset to the featurestore with a non numeric split percentage" do + split = [{ name: "train_split", percentage: "wrong" }] + all_metadata = create_featureview_training_dataset_from_project(@project, expected_status_code: 400, splits: split) + parsed_json = all_metadata["response"] + expect(parsed_json.key?("errorCode")).to be true + expect(parsed_json.key?("errorMsg")).to be true + expect(parsed_json.key?("usrMsg")).to be true + expect(parsed_json["errorCode"] == 270099).to be true + end + + it "should not be able to add a hopsfs training dataset to the featurestore with a illegal split name" do + split = [{ name: "ILLEGALNAME!!!", percentage: 0.8 }] + all_metadata = create_featureview_training_dataset_from_project(@project, expected_status_code: 400, splits: split) + parsed_json = all_metadata["response"] + expect(parsed_json.key?("errorCode")).to be true + expect(parsed_json.key?("errorMsg")).to be true + expect(parsed_json.key?("usrMsg")).to be true + expect(parsed_json["errorCode"] == 270098).to be true + end + + it "should not be able to add a hopsfs training dataset to the featurestore with splits of duplicate split + names" do + splits = [ + { + name: "test_split", + percentage: 0.8 + }, + { + name: "test_split", + percentage: 0.2 + } + ] + all_metadata = create_featureview_training_dataset_from_project(@project, expected_status_code: 400, splits: splits, train_split: "test_split") + parsed_json = all_metadata["response"] + expect(parsed_json.key?("errorCode")).to be true + expect(parsed_json.key?("errorMsg")).to be true + expect(parsed_json.key?("usrMsg")).to be true + expect(parsed_json["errorCode"] == 270106).to be true + end + + it "should not be able to create a training dataset with the same name and version" do + all_metadata = create_featureview_training_dataset_from_project(@project) + parsed_json = all_metadata["response"] + featureview = all_metadata["featureView"] + connector = all_metadata["connector"] + + create_featureview_training_dataset(@project.id, featureview, connector, version: parsed_json["version"]) + expect_status(400) + end + + it "should be able to add a hopsfs training dataset to the featurestore without specifying a hopsfs connector" do + featurestore_id = get_featurestore_id(@project.id) + featuregroup_suffix = short_random_id + query = make_sample_query(@project, featurestore_id, featuregroup_suffix: featuregroup_suffix) + json_result, _ = create_feature_view(@project.id, featurestore_id, query) + expect_status(201) + featureview = JSON.parse(json_result) + td = create_featureview_training_dataset(@project.id, featureview, nil) + parsed_json = JSON.parse(td) + expect(parsed_json["storageConnector"]["name"] == "#{@project['projectname']}_Training_Datasets") + end + + it "should be able to delete a hopsfs training dataset" do + all_metadata = create_featureview_training_dataset_from_project(@project) + parsed_json = all_metadata["response"] + featureview = all_metadata["featureView"] + + delete_featureview_training_dataset(@project, featureview, version: parsed_json["version"]) + end + + it "should be able to delete all hopsfs training dataset" do + all_metadata = create_featureview_training_dataset_from_project(@project) + parsed_json = all_metadata["response"] + featureview = all_metadata["featureView"] + connector = all_metadata["connector"] + create_featureview_training_dataset(@project.id, featureview, connector, version: nil) + + delete_featureview_training_dataset(@project, featureview) + end + + it "should be able to delete a hopsfs training dataset (data only)" do + all_metadata = create_featureview_training_dataset_from_project(@project) + parsed_json = all_metadata["response"] + featureview = all_metadata["featureView"] + + delete_featureview_training_dataset_data_only(@project, featureview, version: parsed_json["version"]) + end + + it "should be able to delete all hopsfs training dataset (data only)" do + all_metadata = create_featureview_training_dataset_from_project(@project) + parsed_json = all_metadata["response"] + featureview = all_metadata["featureView"] + connector = all_metadata["connector"] + create_featureview_training_dataset(@project.id, featureview, connector, version: nil) + + delete_featureview_training_dataset_data_only(@project, featureview) + end + + it "should not be able to update the metadata of a hopsfs training dataset from the featurestore" do + all_metadata = create_featureview_training_dataset_from_project(@project) + parsed_json = all_metadata["response"] + featureview = all_metadata["featureView"] + + json_data = { + name: "new_testtrainingdatasetname", + dataFormat: "petastorm" + } + + json_result2 = update_featureview_training_dataset_metadata(@project, featureview, parsed_json["version"], json_data) + parsed_json2 = JSON.parse(json_result2) + expect_status(200) + expect(parsed_json2.key?("id")).to be true + expect(parsed_json2.key?("name")).to be true + expect(parsed_json2["creator"].key?("email")).to be true + expect(parsed_json2.key?("location")).to be true + expect(parsed_json2.key?("version")).to be true + expect(parsed_json2.key?("dataFormat")).to be true + expect(parsed_json2.key?("trainingDatasetType")).to be true + expect(parsed_json2.key?("inodeId")).to be true + + expect(parsed_json2["version"]).to eql(parsed_json["version"]) + # make sure the dataformat didn't change + expect(parsed_json2["dataFormat"] == "tfrecords").to be true + end + + it "should not be able to update the name of a training dataset" do + all_metadata = create_featureview_training_dataset_from_project(@project) + parsed_json = all_metadata["response"] + featureview = all_metadata["featureView"] + + json_data = { + name: "new_testtrainingdatasetname" + } + + json_result2 = update_featureview_training_dataset_metadata(@project, featureview, parsed_json["version"], json_data) + parsed_json2 = JSON.parse(json_result2) + expect_status(200) + + expect(parsed_json2["version"]).to eql(parsed_json["version"]) + # make sure the name didn't change + expect(parsed_json2["name"]).to eql(parsed_json["name"]) + end + + it "should be able to update the description of a training dataset" do + all_metadata = create_featureview_training_dataset_from_project(@project) + parsed_json = all_metadata["response"] + featureview = all_metadata["featureView"] + + json_data = { + name: "new_testtrainingdatasetname", + description: "new_testtrainingdatasetdescription" + } + + json_result2 = update_featureview_training_dataset_metadata(@project, featureview, parsed_json["version"], json_data) + parsed_json2 = JSON.parse(json_result2) + expect_status(200) + + expect(parsed_json2["description"]).to eql("new_testtrainingdatasetdescription") + expect(parsed_json2["version"]).to eql(parsed_json["version"]) + # make sure the name didn't change + expect(parsed_json2["name"]).to eql(parsed_json["name"]) + end + + it "should be able to get a list of training dataset versions based on the version" do + all_metadata = create_featureview_training_dataset_from_project(@project) + parsed_json = all_metadata["response"] + featureview = all_metadata["featureView"] + connector = all_metadata["connector"] + create_featureview_training_dataset(@project.id, featureview, connector, version: nil) + + json_result = get_featureview_training_dataset(@project, featureview) + expect_status(200) + parsed_json = JSON.parse(json_result) + expect(parsed_json["count"]).to eq 2 + end + + it "should be able to get a training dataset based on version" do + all_metadata = create_featureview_training_dataset_from_project(@project) + parsed_json = all_metadata["response"] + featureview = all_metadata["featureView"] + connector = all_metadata["connector"] + create_featureview_training_dataset(@project.id, featureview, connector, version: nil) + + json_result = get_featureview_training_dataset(@project, featureview, version: 1) + parsed_json = JSON.parse(json_result) + expect(parsed_json['version']).to be 1 + expect(parsed_json['name']).to eq "#{featureview['name']}_#{featureview['version']}" + + json_result = get_featureview_training_dataset(@project, featureview, version: 2) + parsed_json = JSON.parse(json_result) + expect(parsed_json['version']).to be 2 + expect(parsed_json['name']).to eq "#{featureview['name']}_#{featureview['version']}" + end + + it "should be able to attach keywords" do + # TODO: keyword not implement yet for TD + end + + it "should fail to attach invalid keywords" do + # TODO: keyword not implement yet for TD + end + + it "should be able to remove keyword" do + # TODO: keyword not implement yet for TD + end + + it "should be able to create a training dataset without statistics settings to test the defaults" do + all_metadata = create_featureview_training_dataset_from_project(@project) + parsed_json = all_metadata["response"] + expect(parsed_json.key?("statisticsConfig")).to be true + expect(parsed_json["statisticsConfig"].key?("histograms")).to be true + expect(parsed_json["statisticsConfig"].key?("correlations")).to be true + expect(parsed_json["statisticsConfig"].key?("exactUniqueness")).to be true + expect(parsed_json["statisticsConfig"].key?("enabled")).to be true + expect(parsed_json["statisticsConfig"].key?("columns")).to be true + expect(parsed_json["statisticsConfig"]["columns"].length).to eql(0) + expect(parsed_json["statisticsConfig"]["enabled"]).to be true + expect(parsed_json["statisticsConfig"]["correlations"]).to be false + expect(parsed_json["statisticsConfig"]["exactUniqueness"]).to be false + expect(parsed_json["statisticsConfig"]["histograms"]).to be false + end + + it "should be able to create a training dataset with statistics settings and retrieve them back" do + stats_config = { enabled: false, histograms: false, correlations: false, exactUniqueness: false, columns: + ["a_testfeature"] } + all_metadata = create_featureview_training_dataset_from_project(@project, statistics_config: stats_config) + parsed_json = all_metadata["response"] + expect(parsed_json["statisticsConfig"]["columns"].length).to eql(1) + expect(parsed_json["statisticsConfig"]["columns"][0]).to eql("a_testfeature") + expect(parsed_json["statisticsConfig"]["enabled"]).to be false + expect(parsed_json["statisticsConfig"]["correlations"]).to be false + expect(parsed_json["statisticsConfig"]["exactUniqueness"]).to be false + expect(parsed_json["statisticsConfig"]["histograms"]).to be false + end + + it "should not be possible to add a training dataset with non-existing statistic column" do + stats_config = { enabled: false, histograms: false, correlations: false, exactUniqueness: false, columns: ["wrongname"] } + all_metadata = create_featureview_training_dataset_from_project( + @project, statistics_config: stats_config, expected_status_code: 400) + parsed_json = all_metadata["response"] + expect(parsed_json.key?("errorCode")).to be true + expect(parsed_json.key?("errorMsg")).to be true + expect(parsed_json.key?("usrMsg")).to be true + expect(parsed_json["errorCode"]).to eql(270108) + end + + it "should be able to update the statistics config of a training dataset" do + all_metadata = create_featureview_training_dataset_from_project(@project) + parsed_json = all_metadata["response"] + featureview = all_metadata["featureView"] + + json_data = { + statisticsConfig: { + histograms: false, + correlations: false, + exactUniqueness: false, + columns: ["a_testfeature"], + enabled: false + } + } + + json_result2 = update_featureview_training_dataset_stats_config(@project, featureview, parsed_json["version"], json_data) + parsed_json2 = JSON.parse(json_result2) + expect_status(200) + expect(parsed_json2["statisticsConfig"]["columns"].length).to eql(1) + expect(parsed_json2["statisticsConfig"]["columns"][0]).to eql("a_testfeature") + expect(parsed_json2["statisticsConfig"]["enabled"]).to be false + expect(parsed_json2["statisticsConfig"]["correlations"]).to be false + expect(parsed_json2["statisticsConfig"]["exactUniqueness"]).to be false + expect(parsed_json2["statisticsConfig"]["histograms"]).to be false + end + end + end + + describe "external" do + context 'with valid project, s3 connector, and featurestore service enabled' do + before :all do + with_valid_project + with_s3_connector(@project[:id]) + end + + it "should be able to add an external training dataset to the featurestore" do + connector = make_connector_dto(get_s3_connector_id) + all_metadata = create_featureview_training_dataset_from_project(@project, connector: connector, is_internal: false) + featureview = all_metadata["featureView"] + parsed_json = all_metadata["response"] + expect(parsed_json.key?("id")).to be true + expect(parsed_json.key?("featurestoreName")).to be true + expect(parsed_json.key?("name")).to be true + expect(parsed_json["creator"].key?("email")).to be true + expect(parsed_json.key?("location")).to be true + expect(parsed_json.key?("version")).to be true + expect(parsed_json.key?("dataFormat")).to be true + expect(parsed_json.key?("trainingDatasetType")).to be true + expect(parsed_json.key?("description")).to be true + expect(parsed_json.key?("seed")).to be true + expect(parsed_json["featurestoreName"] == @project.projectname.downcase + "_featurestore").to be true + expect(parsed_json["name"] == "#{featureview['name']}_#{featureview['version']}").to be true + expect(parsed_json["trainingDatasetType"] == "EXTERNAL_TRAINING_DATASET").to be true + expect(parsed_json["storageConnector"]["id"] == connector[:id]).to be true + expect(parsed_json["seed"] == 1234).to be true + end + + it "should not be able to add an external training dataset to the featurestore without specifying a s3 connector" do + featurestore_id = get_featurestore_id(@project.id) + featuregroup_suffix = short_random_id + query = make_sample_query(@project, featurestore_id, featuregroup_suffix: featuregroup_suffix) + json_result, _ = create_feature_view(@project.id, featurestore_id, query) + expect_status(201) + featureview = JSON.parse(json_result) + create_featureview_training_dataset(@project.id, featureview, nil, is_internal: false) + expect_status(404) + end + + it "should be able to add an external training dataset to the featurestore with splits" do + splits = [ + { + name: "test_split", + percentage: 0.8 + }, + { + name: "train_split", + percentage: 0.2 + } + ] + connector = make_connector_dto(get_s3_connector_id) + all_metadata = create_featureview_training_dataset_from_project( + @project, connector: connector, is_internal: false, splits: splits, train_split: "train_split") + parsed_json = all_metadata["response"] + expect(parsed_json.key?("splits")).to be true + expect(parsed_json["splits"].length).to be 2 + end + + it "should not be able to add an external training dataset to the featurestore with a non numeric split percentage" do + splits = [{ name: "train_split", percentage: "wrong" }] + connector = make_connector_dto(get_s3_connector_id) + all_metadata = create_featureview_training_dataset_from_project( + @project, connector: connector, is_internal: false, splits: splits, expected_status_code: 400) + parsed_json = all_metadata["response"] + expect_status(400) + expect(parsed_json.key?("errorCode")).to be true + expect(parsed_json.key?("errorMsg")).to be true + expect(parsed_json.key?("usrMsg")).to be true + expect(parsed_json["errorCode"] == 270099).to be true + end + + it "should not be able to add an external training dataset to the featurestore with splits of + duplicate split names" do + splits = [ + { + name: "test_split", + percentage: 0.8 + }, + { + name: "test_split", + percentage: 0.2 + } + ] + connector = make_connector_dto(get_s3_connector_id) + all_metadata = create_featureview_training_dataset_from_project( + @project, connector: connector, is_internal: false, splits: splits, train_split: "test_split", expected_status_code: 400) + parsed_json = all_metadata["response"] + + expect(parsed_json.key?("errorCode")).to be true + expect(parsed_json.key?("errorMsg")).to be true + expect(parsed_json.key?("usrMsg")).to be true + expect(parsed_json["errorCode"] == 270106).to be true + end + + it "should be able to delete a training dataset" do + connector = make_connector_dto(get_s3_connector_id) + all_metadata = create_featureview_training_dataset_from_project(@project, connector: connector, is_internal: false) + parsed_json = all_metadata["response"] + featureview = all_metadata["featureView"] + + delete_featureview_training_dataset(@project, featureview, version: parsed_json["version"]) + end + + it "should be able to delete all training dataset" do + connector = make_connector_dto(get_s3_connector_id) + all_metadata = create_featureview_training_dataset_from_project(@project, connector: connector, is_internal: false) + featureview = all_metadata["featureView"] + connector = all_metadata["connector"] + create_featureview_training_dataset(@project.id, featureview, connector, version: nil) + + delete_featureview_training_dataset(@project, featureview) + end + + it "should be able to delete a training dataset (data only)" do + connector = make_connector_dto(get_s3_connector_id) + all_metadata = create_featureview_training_dataset_from_project(@project, connector: connector, is_internal: false) + parsed_json = all_metadata["response"] + featureview = all_metadata["featureView"] + + delete_featureview_training_dataset_data_only(@project, featureview, version: parsed_json["version"]) + end + + it "should be able to delete all training dataset (data only)" do + connector = make_connector_dto(get_s3_connector_id) + all_metadata = create_featureview_training_dataset_from_project(@project, connector: connector, is_internal: false) + featureview = all_metadata["featureView"] + connector = all_metadata["connector"] + create_featureview_training_dataset(@project.id, featureview, connector, version: nil) + + delete_featureview_training_dataset_data_only(@project, featureview) + end + + it "should be able to update the metadata (description) of an external training dataset from the featurestore" do + connector = make_connector_dto(get_s3_connector_id) + all_metadata = create_featureview_training_dataset_from_project(@project, connector: connector, is_internal: false) + parsed_json = all_metadata["response"] + featureview = all_metadata["featureView"] + + json_data = { + name: "new_testtrainingdatasetname", + description: "new_testtrainingdatasetdescription" + } + + json_result2 = update_featureview_training_dataset_metadata(@project, featureview, parsed_json["version"], json_data) + parsed_json2 = JSON.parse(json_result2) + expect_status(200) + expect(parsed_json2.key?("id")).to be true + expect(parsed_json2.key?("featurestoreName")).to be true + expect(parsed_json2.key?("name")).to be true + expect(parsed_json2["creator"].key?("email")).to be true + expect(parsed_json2.key?("location")).to be true + expect(parsed_json2.key?("version")).to be true + expect(parsed_json2.key?("dataFormat")).to be true + expect(parsed_json2.key?("trainingDatasetType")).to be true + expect(parsed_json2.key?("description")).to be true + expect(parsed_json2["featurestoreName"] == @project.projectname.downcase + "_featurestore").to be true + expect(parsed_json2["description"] == "new_testtrainingdatasetdescription").to be true + expect(parsed_json2["trainingDatasetType"] == "EXTERNAL_TRAINING_DATASET").to be true + expect(parsed_json2["version"]).to eql(parsed_json["version"]) + end + + it "should not be able do change the storage connector" do + connector_id = get_s3_connector_id + connector = make_connector_dto(connector_id) + all_metadata = create_featureview_training_dataset_from_project(@project, connector: connector, is_internal: false) + parsed_json = all_metadata["response"] + featureview = all_metadata["featureView"] + + json_new_connector, _ = create_s3_connector(@project[:id], featureview["featurestoreId"], access_key: "test", secret_key: "test") + new_connector = JSON.parse(json_new_connector) + + json_data = { + name: "new_testtrainingdatasetname", + storageConnector: { + id: new_connector['id'] + } + } + + json_result2 = update_featureview_training_dataset_metadata(@project, featureview, parsed_json["version"], json_data) + parsed_json2 = JSON.parse(json_result2) + expect_status(200) + + expect(parsed_json2["version"]).to eql(parsed_json["version"]) + # make sure the name didn't change + expect(parsed_json2["storageConnector"]["id"]).to be connector_id + end + + it "should store and return the correct path within the bucket" do + connector = make_connector_dto(get_s3_connector_id) + all_metadata = create_featureview_training_dataset_from_project(@project, connector: connector, location: "/inner/location", is_internal: false) + parsed_json = all_metadata["response"] + featureview = all_metadata["featureView"] + expect(parsed_json['location']).to eql("s3://testbucket/inner/location/#{featureview['name']}_#{featureview['version']}_1") + end + + it "should be able to create a training dataset using ADLS connector" do + project = get_project + featurestore_id = get_featurestore_id(project.id) + connector = create_adls_connector(project.id, featurestore_id) + connector = { "id": JSON.parse(connector)['id'] } + all_metadata = create_featureview_training_dataset_from_project(@project, connector: connector, is_internal: false, location: "/inner/location/") + parsed_json = all_metadata["response"] + featureview = all_metadata["featureView"] + expect(parsed_json['location']).to eql("abfss://containerName@accountName.dfs.core.windows.net/inner/location/#{featureview['name']}_#{featureview['version']}_1") + end + + it "should not be able to create a training dataset using a SNOWFLAKE connector" do + project = get_project + featurestore_id = get_featurestore_id(project.id) + connector = create_snowflake_connector(project.id, featurestore_id) + connector = JSON.parse(connector) + create_featureview_training_dataset_from_project(@project, connector: connector, is_internal: false, expected_status_code: 404) + end + + it "should not be able to create a training dataset using a REDSHIFT connector" do + project = get_project + featurestore_id = get_featurestore_id(project.id) + connector, _ = create_redshift_connector(project.id, featurestore_id, databasePassword: "pwdf") + connector = JSON.parse(connector) + create_featureview_training_dataset_from_project(@project, connector: connector, is_internal: false, expected_status_code: 404) + end + + it "should not be able to create a training dataset using a JDBC connector" do + project = get_project + featurestore_id = get_featurestore_id(project.id) + connector, _ = create_jdbc_connector(project.id, featurestore_id) + connector = JSON.parse(connector) + create_featureview_training_dataset_from_project(@project, connector: connector, is_internal: false, expected_status_code: 404) + end + end + end + end +end diff --git a/hopsworks-IT/src/test/ruby/spec/featureview_transformation.rb b/hopsworks-IT/src/test/ruby/spec/featureview_transformation.rb new file mode 100644 index 0000000000..e83dc52495 --- /dev/null +++ b/hopsworks-IT/src/test/ruby/spec/featureview_transformation.rb @@ -0,0 +1,63 @@ +# This file is part of Hopsworks +# Copyright (C) 2022, Logical Clocks AB. All rights reserved +# +# Hopsworks is free software: you can redistribute it and/or modify it under the terms of +# the GNU Affero General Public License as published by the Free Software Foundation, +# either version 3 of the License, or (at your option) any later version. +# +# Hopsworks is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; +# without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +# PURPOSE. See the GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License along with this program. +# If not, see . + +require 'json' + +describe "On #{ENV['OS']}" do + after(:all) { clean_all_test_projects(spec: "featureviewtransformation") } + + describe "feature view transformation" do + describe "internal" do + context 'with valid project, featurestore service enabled' do + before :all do + with_valid_project + end + + it "should be able to attach transformation function." do + featurestore_id = get_featurestore_id(@project.id) + featuregroup_suffix = short_random_id + query = make_sample_query(@project, featurestore_id, featuregroup_suffix: featuregroup_suffix) + + # create transformation function + json_result = register_transformation_fn(@project.id, featurestore_id) + transformation_function = JSON.parse(json_result) + expect_status(200) + + feature_schema = [ + {type: "INT", name: "a_testfeature1", featureGroupFeatureName: "a_testfeature1", label: false, transformationFunction: transformation_function}, + {type: "INT", name: "b_testfeature1", featureGroupFeatureName: "b_testfeature1", label: false, transformationFunction: transformation_function} + ] + + json_result, _ = create_feature_view(@project.id, featurestore_id, query, features: feature_schema) + parsed_json = JSON.parse(json_result) + expect_status(201) + feature_view_name = parsed_json["name"] + feature_view_version = parsed_json["version"] + + # endpoint to attach transformation functions + json_result = get "#{ENV['HOPSWORKS_API']}/project/#{@project.id}/featurestores/#{featurestore_id}/featureview/#{feature_view_name}/version/#{feature_view_version}/transformation/functions" + parsed_json = JSON.parse(json_result) + feature_transformation = parsed_json["items"].first["transformationFunction"] + expect(feature_transformation["name"]).to eql("plus_one") + expect(feature_transformation["outputType"]).to eql("FloatType()") + + feature_transformation = parsed_json["items"].second["transformationFunction"] + expect(feature_transformation["name"]).to eql("plus_one") + expect(feature_transformation["outputType"]).to eql("FloatType()") + end + + end + end + end +end diff --git a/hopsworks-IT/src/test/ruby/spec/helpers/featurestore_helper.rb b/hopsworks-IT/src/test/ruby/spec/helpers/featurestore_helper.rb index 6c4230dee6..22e499251f 100644 --- a/hopsworks-IT/src/test/ruby/spec/helpers/featurestore_helper.rb +++ b/hopsworks-IT/src/test/ruby/spec/helpers/featurestore_helper.rb @@ -704,6 +704,175 @@ def create_hopsfs_training_dataset(project_id, featurestore_id, hopsfs_connector [json_result, name] end + def create_feature_view(project_id, featurestore_id, query, name: nil, + version: 1, features: nil, description: nil) + + name = name == nil ? "feature_view_#{random_id}" : name + description = description == nil ? "testfeatureviewdescription" : description + + json_data = { + name: name, + version: version, + description: description, + features: features, + query: query + } + + create_featureview_endpoint = "#{ENV['HOPSWORKS_API']}/project/#{project_id.to_s}/featurestores/#{featurestore_id.to_s}/featureview" + json_result = post create_featureview_endpoint, json_data.to_json + [json_result, name] + end + + def create_feature_view_from_feature_group(project_id, featurestore_id, fg, name: nil, version: 1, description: nil) + features = fg["features"] + fg_id = fg["id"] + + query = { + leftFeatureGroup: { + id: fg_id + }, + leftFeatures: features, + joins: [] + } + + create_feature_view(project_id, featurestore_id, query, features: features, + name: name, version: version, description: description) + end + + def update_feature_view(project_id, featurestore_id, json_data) + create_featureview_endpoint = "#{ENV['HOPSWORKS_API']}/project/#{project_id.to_s}/featurestores/#{featurestore_id.to_s}/featureview" + json_result = put create_featureview_endpoint, json_data.to_json + return json_result + end + + def create_featureview_training_dataset_from_project(project, expected_status_code: 201, data_format: "tfrecords", + version: 1, splits: [], description: "testtrainingdatasetdescription", + statistics_config: nil, train_split: nil, is_internal: true, connector: nil, location: nil) + featurestore_id = get_featurestore_id(project.id) + featuregroup_suffix = short_random_id + query = make_sample_query(project, featurestore_id, featuregroup_suffix: featuregroup_suffix) + json_result, _ = create_feature_view(project.id, featurestore_id, query) + expect_status(201) + featureview = JSON.parse(json_result) + + if connector == nil + connector = get_hopsfs_training_datasets_connector(@project[:projectname]) + end + + json_result = create_featureview_training_dataset( + project.id, featureview, connector, version: version, splits: splits, description: description, + statistics_config: statistics_config, train_split: train_split, data_format: data_format, + is_internal: is_internal, location: location) + expect_status(expected_status_code) + parsed_json = JSON.parse(json_result) + {"response" => parsed_json, "connector" => connector, "featureView" => featureview} + end + + def create_featureview_training_dataset(project_id, featureview, hopsfs_connector, data_format: "tfrecords", + version: 1, splits: [], description: "testtrainingdatasetdescription", + statistics_config: nil, train_split: nil, query_param: nil, is_internal: true, location: nil) + trainingDatasetType = is_internal ? "HOPSFS_TRAINING_DATASET": "EXTERNAL_TRAINING_DATASET" + create_training_dataset_endpoint = "#{ENV['HOPSWORKS_API']}/project/#{project_id.to_s}/featurestores/#{featureview["featurestoreId"].to_s}" + + "/featureview/#{featureview["name"]}/version/#{featureview["version"].to_s}/trainingdatasets" + unless query_param != nil + create_training_dataset_endpoint = create_training_dataset_endpoint + "?#{query_param}" + end + json_data = { + description: description, + version: version, + dataFormat: data_format, + trainingDatasetType: trainingDatasetType, + splits: splits, + seed: 1234, + trainSplit: train_split, + location: location + } + unless statistics_config == nil + json_data[:statisticsConfig] = statistics_config + end + + unless hopsfs_connector.nil? + json_data["storageConnector"] = {id: hopsfs_connector[:id]} + end + json_result = post create_training_dataset_endpoint, json_data.to_json + json_result + end + + def get_featureview_training_dataset(project, featureview, version: nil, expected_status_code: 200) + training_dataset_endpoint = "#{ENV['HOPSWORKS_API']}/project/#{project.id.to_s}" + + "/featurestores/#{featureview["featurestoreId"].to_s}/featureview/#{featureview["name"]}/version/#{featureview["version"].to_s}/trainingdatasets" + unless version == nil + training_dataset_endpoint = training_dataset_endpoint + "/version/#{version.to_s}" + end + training_datasets = get training_dataset_endpoint + expect_status(expected_status_code) + training_datasets + end + + def delete_featureview_training_dataset(project, featureview, version: nil) + training_datasets = JSON.parse(get_featureview_training_dataset(project, featureview, version: version)) + training_dataset_endpoint = "#{ENV['HOPSWORKS_API']}/project/#{project.id.to_s}" + + "/featurestores/#{featureview["featurestoreId"].to_s}/featureview/#{featureview["name"]}/version/#{featureview["version"].to_s}/trainingdatasets" + unless version == nil + training_dataset_endpoint = training_dataset_endpoint + "/version/#{version.to_s}" + end + json_result2 = delete training_dataset_endpoint + expect_status(200) + + unless version == nil + training_datasets = {"items" => [training_datasets]} + end + + # Make sure that the directory has been removed correctly + training_datasets["items"].each { |training_dataset| + get_datasets_in_path(project, + "#{project[:projectname]}_Training_Datasets/#{featureview['name']}_#{training_dataset['version'].to_s}", + query: "&type=DATASET") + expect_status(400) + } + end + + def delete_featureview_training_dataset_data_only(project, featureview, version: nil) + training_datasets = JSON.parse(get_featureview_training_dataset(project, featureview, version: version)) + training_dataset_endpoint = "#{ENV['HOPSWORKS_API']}/project/#{project.id.to_s}" + + "/featurestores/#{featureview["featurestoreId"].to_s}/featureview/#{featureview["name"]}/version/#{featureview["version"].to_s}/trainingdatasets" + if version == nil + training_dataset_endpoint = training_dataset_endpoint + "/data" + else + training_dataset_endpoint = training_dataset_endpoint + "/version/#{version.to_s}/data" + end + json_result2 = delete training_dataset_endpoint + expect_status(200) + + unless version == nil + training_datasets = {"items" => [training_datasets]} + end + + training_datasets["items"].each { |training_dataset| + get_datasets_in_path(project, + "#{project[:projectname]}_Training_Datasets/#{featureview['name']}_#{featureview['version']}_#{training_dataset['version'].to_s}", + query: "&type=DATASET") + expect_status(200) + } + + # should be able to retrieve metadata + get_featureview_training_dataset(project, featureview, version: version) + end + + def update_featureview_training_dataset_metadata(project, featureview, version, json_data) + training_dataset_endpoint = "#{ENV['HOPSWORKS_API']}/project/#{project.id.to_s}" + + "/featurestores/#{featureview["featurestoreId"].to_s}/featureview/#{featureview["name"]}/version/#{featureview["version"].to_s}/trainingdatasets/version/#{version.to_s}?updateMetadata=true" + json_result = put training_dataset_endpoint, json_data.to_json + return json_result + end + + def update_featureview_training_dataset_stats_config(project, featureview, version, json_data) + training_dataset_endpoint = "#{ENV['HOPSWORKS_API']}/project/#{project.id.to_s}" + + "/featurestores/#{featureview["featurestoreId"].to_s}/featureview/#{featureview["name"]}/version/#{featureview["version"].to_s}/trainingdatasets/version/#{version.to_s}?updateStatsConfig=true" + json_result = put training_dataset_endpoint, json_data.to_json + return json_result + end + def create_external_training_dataset(project_id, featurestore_id, connector_id, name: nil, location: "", splits:[], features: nil, train_split: nil) trainingDatasetType = "EXTERNAL_TRAINING_DATASET" @@ -924,6 +1093,7 @@ def check_trainingdataset_usage(project_id, td_id, check, fs_id: nil, fs_project end raise wait_result["ex"] unless wait_result["success"] end + def trainingdataset_usage(project_id, td_id, fs_id: nil, fs_project_id: nil, type: []) fs_project_id = project_id if fs_project_id.nil? fs_id = get_featurestore(project_id, fs_project_id: fs_project_id)["featurestoreId"] if fs_id.nil? @@ -938,6 +1108,7 @@ def trainingdataset_usage(project_id, td_id, fs_id: nil, fs_project_id: nil, typ expect_status_details(200) JSON.parse(result) end + def register_transformation_fn(project_id, featurestore_id, transformation_fn_metadata: nil) plus_one = { "name": "plus_one", @@ -955,4 +1126,50 @@ def register_transformation_fn(project_id, featurestore_id, transformation_fn_me endpoint = "#{ENV['HOPSWORKS_API']}/project/#{project_id}/featurestores/#{featurestore_id}/transformationfunctions" post endpoint, json_data end -end \ No newline at end of file + + def make_sample_query(project, featurestore_id, featuregroup_suffix: "") + features_a = [ + { type: "INT", name: "a_testfeature", primary: true }, + { type: "INT", name: "a_testfeature1" }, + { type: "BIGINT", name: "ts" }, + ] + fg_id = create_cached_featuregroup_checked(project.id, featurestore_id, "test_fg_a#{featuregroup_suffix}", + features: features_a, + event_time: "ts") + # create second feature group + features_b = [ + { type: "INT", name: "a_testfeature", primary: true }, + { type: "INT", name: "b_testfeature1" }, + { type: "BIGINT", name: "ts" }, + ] + fg_id_b = create_cached_featuregroup_checked(project.id, featurestore_id, "test_fg_b#{featuregroup_suffix}", + features: features_b, event_time: "ts") + query = { + leftFeatureGroup: { + id: fg_id + }, + leftFeatures: [{ name: 'a_testfeature' }, { name: 'a_testfeature1' }], + joins: [{ + query: { + leftFeatureGroup: { + id: fg_id_b + }, + leftFeatures: [{ name: 'a_testfeature' }, { name: 'b_testfeature1' }] + } + } + ], + filter: { + type: "SINGLE", + leftFilter: { + feature: { + name: "a_testfeature1", + featureGroupId: fg_id + }, + condition: "GREATER_THAN", + value: "0" + } + } + } + query + end +end diff --git a/hopsworks-IT/src/test/ruby/spec/helpers/storage_connector_helper.rb b/hopsworks-IT/src/test/ruby/spec/helpers/storage_connector_helper.rb index 8702e89fbd..2768dd8355 100644 --- a/hopsworks-IT/src/test/ruby/spec/helpers/storage_connector_helper.rb +++ b/hopsworks-IT/src/test/ruby/spec/helpers/storage_connector_helper.rb @@ -269,6 +269,10 @@ def get_s3_connector_id @s3_connector_id end + def make_connector_dto(connector_id) + {id: connector_id} + end + def with_s3_connector(project_id) featurestore_id = get_featurestore_id(project_id) json_result, _ = create_s3_connector(project_id, featurestore_id, diff --git a/hopsworks-IT/src/test/ruby/spec/preparedstatements_spec.rb b/hopsworks-IT/src/test/ruby/spec/preparedstatements_spec.rb new file mode 100644 index 0000000000..fbe0ce10fd --- /dev/null +++ b/hopsworks-IT/src/test/ruby/spec/preparedstatements_spec.rb @@ -0,0 +1,410 @@ +# This file is part of Hopsworks +# Copyright (C) 2020, Logical Clocks AB. All rights reserved +# +# Hopsworks is free software: you can redistribute it and/or modify it under the terms of +# the GNU Affero General Public License as published by the Free Software Foundation, +# either version 3 of the License, or (at your option) any later version. +# +# Hopsworks is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; +# without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +# PURPOSE. See the GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License along with this program. +# If not, see . + +require 'json' + +describe "On #{ENV['OS']}" do + after(:all) {clean_all_test_projects(spec: "preparedstatements")} + + describe "prepared statements" do + + describe "list" do + context 'with valid project, featurestore service enabled' do + before :all do + with_valid_project + end + + it "should be able to get a feature view serving vector" do + # create first feature group + featurestore_id = get_featurestore_id(@project.id) + project_name = @project.projectname + features = ['a', 'b', 'c', 'd'].map do |feat_name| + {type: "INT", name: feat_name} + end + features[0]['primary'] = true + json_result, fg_name = create_cached_featuregroup(@project.id, featurestore_id, features: features, + featuregroup_name: "test_fg_a_#{short_random_id}", online:true) + parsed_json = JSON.parse(json_result) + fg_id = parsed_json["id"] + # create queryDTO object + query = { + leftFeatureGroup: { + id: fg_id + }, + leftFeatures: ['d', 'c', 'a', 'b'].map do |feat_name| + {name: feat_name} + end, + joins: [] + } + + json_result, _ = create_feature_view(@project.id, featurestore_id, query) + parsed_json = JSON.parse(json_result) + expect_status(201) + + feature_view_name = parsed_json["name"] + feature_view_version = parsed_json["version"] + json_result = get "#{ENV['HOPSWORKS_API']}/project/#{@project.id}/featurestores/#{featurestore_id}/featureview/#{feature_view_name}/version/#{feature_view_version}/preparedstatement" + expect_status_details(200) + + parsed_json = JSON.parse(json_result) + expect(parsed_json["items"].first["preparedStatementParameters"].first["index"]).to eql(1) + expect(parsed_json["items"].first["preparedStatementParameters"].first["name"]).to eql("a") + expect(parsed_json["items"].first["queryOnline"]).to eql("SELECT `fg0`.`d` AS `d`, `fg0`.`c` AS `c`, `fg0`.`a` AS `a`, `fg0`.`b` AS `b`\n"+ + "FROM `#{project_name.downcase}`.`#{fg_name}_1` AS `fg0`\n" + + "WHERE `fg0`.`a` = ?") + end + + it "should be able to get a feature view serving vector in correct order" do + # create first feature group + featurestore_id = get_featurestore_id(@project.id) + project_name = @project.projectname + features = [ + {type: "INT", name: "a_testfeature", primary: true}, + {type: "INT", name: "a_testfeature1"}, + ] + json_result, fg_name = create_cached_featuregroup(@project.id, featurestore_id, features: features, featuregroup_name: "test_fg_a_#{short_random_id}", online:true) + parsed_json = JSON.parse(json_result) + fg_id = parsed_json["id"] + # create second feature group + features = [ + {type: "INT", name: "a_testfeature", primary: true}, + {type: "INT", name: "b_testfeature1"}, + ] + json_result_b, fg_name_b = create_cached_featuregroup(@project.id, featurestore_id, features: features, featuregroup_name: "test_fg_b_#{short_random_id}", online:true) + parsed_json_b = JSON.parse(json_result_b) + fg_id_b = parsed_json_b["id"] + # create queryDTO object + query = { + leftFeatureGroup: { + id: fg_id + }, + leftFeatures: [{name: 'a_testfeature1'}], + joins: [{ + query: { + leftFeatureGroup: { + id: fg_id_b + }, + leftFeatures: [{name: 'b_testfeature1'}] + } + } + ] + } + + json_result, _ = create_feature_view(@project.id, featurestore_id, query) + parsed_json = JSON.parse(json_result) + expect_status_details(201) + + feature_view_name = parsed_json["name"] + feature_view_version = parsed_json["version"] + json_result = get "#{ENV['HOPSWORKS_API']}/project/#{@project.id}/featurestores/#{featurestore_id}/featureview/#{feature_view_name}/version/#{feature_view_version}/preparedstatement" + expect_status_details(200) + + parsed_json = JSON.parse(json_result) + expect(parsed_json["items"].first["preparedStatementParameters"].first["index"]).to eql(1) + expect(parsed_json["items"].first["preparedStatementParameters"].first["name"]).to eql("a_testfeature") + expect(parsed_json["items"].first["queryOnline"]).to eql("SELECT `fg0`.`a_testfeature1` AS `a_testfeature1`\nFROM `#{project_name.downcase}`.`#{fg_name}_1` AS `fg0`\nWHERE `fg0`.`a_testfeature` = ?") + expect(parsed_json["items"].second["preparedStatementParameters"].first["index"]).to eql(1) + expect(parsed_json["items"].second["preparedStatementParameters"].first["name"]).to eql("a_testfeature") + expect(parsed_json["items"].second["queryOnline"]).to eql("SELECT `fg0`.`b_testfeature1` AS `b_testfeature1`\nFROM `#{project_name.downcase}`.`#{fg_name_b}_1` AS `fg0`\nWHERE `fg0`.`a_testfeature` = ?") + end + + it "should be able to get a feature view serving vector in correct order and remove feature group with only primary key and label" do + # create first feature group + featurestore_id = get_featurestore_id(@project.id) + project_name = @project.projectname + features = [ + {type: "INT", name: "a_testfeature", primary: true}, + {type: "INT", name: "a_testfeature1"}, + ] + json_result, fg_name = create_cached_featuregroup(@project.id, featurestore_id, features: features, featuregroup_name: "test_fg_a_#{short_random_id}", online:true) + parsed_json = JSON.parse(json_result) + fg_id = parsed_json["id"] + # create second feature group + features = [ + {type: "INT", name: "a_testfeature", primary: true}, + {type: "INT", name: "b_testfeature1"}, + ] + json_result_b, fg_name_b = create_cached_featuregroup(@project.id, featurestore_id, features: features, featuregroup_name: "test_fg_b_#{short_random_id}", online:true) + parsed_json_b = JSON.parse(json_result_b) + fg_id_b = parsed_json_b["id"] + + # create third feature group + features = [ + {type: "INT", name: "a_testfeature", primary: true}, + {type: "INT", name: "c_testfeature1",label: true}, + ] + json_result_c, fg_name_c = create_cached_featuregroup(@project.id, featurestore_id, features: features, featuregroup_name: "test_fg_c_#{short_random_id}", online:true) + parsed_json_c = JSON.parse(json_result_c) + fg_id_c = parsed_json_c["id"] + # create queryDTO object + query = { + leftFeatureGroup: { + id: fg_id + }, + leftFeatures: [{name: 'a_testfeature1'}], + joins: [{ + query: { + leftFeatureGroup: { + id: fg_id_c + }, + leftFeatures: [{name: 'c_testfeature1'}] + }, + }, + { + query: { + leftFeatureGroup: { + id: fg_id_b + }, + leftFeatures: [{name: 'b_testfeature1'}] + }, + } + ] + } + + feature_schema = [ + {type: "INT", name: "a_testfeature1", label: false}, + {type: "INT", name: "b_testfeature1", label: false}, + {type: "INT", name: "c_testfeature1", label: true} + ] + + json_result, _ = create_feature_view(@project.id, featurestore_id, query, features: feature_schema) + parsed_json = JSON.parse(json_result) + expect_status_details(201) + + feature_view_name = parsed_json["name"] + feature_view_version = parsed_json["version"] + json_result = get "#{ENV['HOPSWORKS_API']}/project/#{@project.id}/featurestores/#{featurestore_id}/featureview/#{feature_view_name}/version/#{feature_view_version}/preparedstatement?batch=false" + expect_status_details(200) + + parsed_json = JSON.parse(json_result) + expect(parsed_json["items"].length).to eql(2) + expect(parsed_json["items"].first["preparedStatementParameters"].first["index"]).to eql(1) + expect(parsed_json["items"].first["preparedStatementParameters"].first["name"]).to eql("a_testfeature") + expect(parsed_json["items"].first["queryOnline"]).to eql("SELECT `fg0`.`a_testfeature1` AS `a_testfeature1`\nFROM `#{project_name.downcase}`.`#{fg_name}_1` AS `fg0`\nWHERE `fg0`.`a_testfeature` = ?") + expect(parsed_json["items"].second["preparedStatementParameters"].first["index"]).to eql(1) + expect(parsed_json["items"].second["preparedStatementParameters"].first["name"]).to eql("a_testfeature") + expect(parsed_json["items"].second["queryOnline"]).to eql("SELECT `fg0`.`b_testfeature1` AS `b_testfeature1`\nFROM `#{project_name.downcase}`.`#{fg_name_b}_1` AS `fg0`\nWHERE `fg0`.`a_testfeature` = ?") + end + + it "should be able to get a feature view serving vector in correct order and remove feature group with only primary key and label for batch serving" do + # create first feature group + featurestore_id = get_featurestore_id(@project.id) + project_name = @project.projectname + features = [ + {type: "INT", name: "a_testfeature", primary: true}, + {type: "INT", name: "a_testfeature1"}, + ] + json_result, fg_name = create_cached_featuregroup(@project.id, featurestore_id, features: features, featuregroup_name: "test_fg_a_#{short_random_id}", online:true) + parsed_json = JSON.parse(json_result) + fg_id = parsed_json["id"] + # create second feature group + features = [ + {type: "INT", name: "a_testfeature", primary: true}, + {type: "INT", name: "d_testfeature1", primary: true}, + {type: "INT", name: "b_testfeature1"}, + ] + json_result_b, fg_name_b = create_cached_featuregroup(@project.id, featurestore_id, features: features, featuregroup_name: "test_fg_b_#{short_random_id}", online:true) + parsed_json_b = JSON.parse(json_result_b) + fg_id_b = parsed_json_b["id"] + + # create third feature group + features = [ + {type: "INT", name: "a_testfeature", primary: true}, + {type: "INT", name: "c_testfeature1",label: true}, + ] + json_result_c, fg_name_c = create_cached_featuregroup(@project.id, featurestore_id, features: features, featuregroup_name: "test_fg_c_#{short_random_id}", online:true) + parsed_json_c = JSON.parse(json_result_c) + fg_id_c = parsed_json_c["id"] + # create queryDTO object + query = { + leftFeatureGroup: { + id: fg_id + }, + leftFeatures: [{name: 'a_testfeature1'}], + joins: [{ + query: { + leftFeatureGroup: { + id: fg_id_c + }, + leftFeatures: [{name: 'c_testfeature1'}] + }, + }, + { + query: { + leftFeatureGroup: { + id: fg_id_b + }, + leftFeatures: [{name: 'b_testfeature1'}] + }, + } + ] + } + + feature_schema = [ + {type: "INT", name: "a_testfeature1", label: false}, + {type: "INT", name: "b_testfeature1", label: false}, + {type: "INT", name: "c_testfeature1", label: true} + ] + + json_result, _ = create_feature_view(@project.id, featurestore_id, query, features: feature_schema) + parsed_json = JSON.parse(json_result) + expect_status_details(201) + + feature_view_name = parsed_json["name"] + feature_view_version = parsed_json["version"] + json_result = get "#{ENV['HOPSWORKS_API']}/project/#{@project.id}/featurestores/#{featurestore_id}/featureview/#{feature_view_name}/version/#{feature_view_version}/preparedstatement?batch=true" + expect_status_details(200) + + parsed_json = JSON.parse(json_result) + expect(parsed_json["items"].length).to eql(2) + expect(parsed_json["items"].first["preparedStatementParameters"].first["index"]).to eql(1) + expect(parsed_json["items"].first["preparedStatementParameters"].first["name"]).to eql("a_testfeature") + expect(parsed_json["items"].first["queryOnline"]).to eql("SELECT `fg0`.`a_testfeature1` AS `a_testfeature1`\nFROM `#{project_name.downcase}`.`#{fg_name}_1` AS `fg0`\nWHERE `fg0`.`a_testfeature` IN ?\nORDER BY `fg0`.`a_testfeature`") + expect(parsed_json["items"].second["preparedStatementParameters"].first["index"]).to eql(1) + expect(parsed_json["items"].second["preparedStatementParameters"].first["name"]).to eql("d_testfeature1") + expect(parsed_json["items"].second["queryOnline"]).to eql("SELECT `fg0`.`b_testfeature1` AS `b_testfeature1`\nFROM `#{project_name.downcase}`.`#{fg_name_b}_1` AS `fg0`\nWHERE (`fg0`.`d_testfeature1`, `fg0`.`a_testfeature`) IN ?\nORDER BY `fg0`.`d_testfeature1`, `fg0`.`a_testfeature`") + end + + it "should fail when calling get serving vector from feature view created from offline fg" do + # create first feature group + featurestore_id = get_featurestore_id(@project.id) + project_name = @project.projectname + + features = [ + {type: "INT", name: "a_testfeature", primary: true}, + {type: "INT", name: "a_testfeature1"}, + ] + json_result, fg_name_a = create_cached_featuregroup(@project.id, featurestore_id, features: features, featuregroup_name: "test_fg_a_#{short_random_id}", online:true) + parsed_json = JSON.parse(json_result) + fg_id = parsed_json["id"] + # create second feature group + features = [ + {type: "INT", name: "a_testfeature", primary: true}, + {type: "INT", name: "b_testfeature1"}, + ] + json_result_b, fg_name_b = create_cached_featuregroup(@project.id, featurestore_id, features: features, featuregroup_name: "test_fg_b_#{short_random_id}", online:false) + parsed_json_b = JSON.parse(json_result_b) + fg_id_b = parsed_json_b["id"] + # create queryDTO object + query = { + leftFeatureGroup: { + id: fg_id + }, + leftFeatures: [{name: 'a_testfeature1'}], + joins: [{ + query: { + leftFeatureGroup: { + id: fg_id_b + }, + leftFeatures: [{name: 'b_testfeature1'}] + } + } + ] + } + + json_result, _ = create_feature_view(@project.id, featurestore_id, query) + parsed_json = JSON.parse(json_result) + expect_status_details(201) + + feature_view_name = parsed_json["name"] + feature_view_version = parsed_json["version"] + get "#{ENV['HOPSWORKS_API']}/project/#{@project.id}/featurestores/#{featurestore_id}/featureview/#{feature_view_name}/version/#{feature_view_version}/preparedstatement" + expect_status(400) + end + + it "should fail when calling get serving vector if a feature group was deleted" do + # create first feature group + featurestore_id = get_featurestore_id(@project.id) + project_name = @project.projectname + + features = [ + {type: "INT", name: "a_testfeature", primary: true}, + {type: "INT", name: "a_testfeature1"}, + ] + json_result, fg_name_a = create_cached_featuregroup(@project.id, featurestore_id, features: features, featuregroup_name: "test_fg_a_#{short_random_id}", online:true) + parsed_json = JSON.parse(json_result) + fg_id = parsed_json["id"] + # create second feature group + features = [ + {type: "INT", name: "a_testfeature", primary: true}, + {type: "INT", name: "b_testfeature1"}, + ] + json_result_b, fg_name_b = create_cached_featuregroup(@project.id, featurestore_id, features: features, featuregroup_name: "test_fg_b_#{short_random_id}", online:false) + parsed_json_b = JSON.parse(json_result_b) + fg_id_b = parsed_json_b["id"] + # create queryDTO object + query = { + leftFeatureGroup: { + id: fg_id + }, + leftFeatures: [{name: 'a_testfeature1'}], + joins: [{ + query: { + leftFeatureGroup: { + id: fg_id_b + }, + leftFeatures: [{name: 'b_testfeature1'}] + } + } + ] + } + + json_result, _ = create_feature_view(@project.id, featurestore_id, query) + parsed_json = JSON.parse(json_result) + expect_status_details(201) + + feature_view_name = parsed_json["name"] + feature_view_version = parsed_json["version"] + + # delete the second feature group + delete_featuregroup_checked(@project.id, featurestore_id, fg_id_b) + + get "#{ENV['HOPSWORKS_API']}/project/#{@project.id}/featurestores/#{featurestore_id}/featureview/#{feature_view_name}/version/#{feature_view_version}/preparedstatement" + expect_status_details(400) + end + + it "should fail when calling get serving vector from feature view created from fg without primary key" do + # create feature group without primary key + featurestore_id = get_featurestore_id(@project.id) + project_name = @project.projectname + features = [ + {type: "INT", name: "a_testfeature"}, + {type: "INT", name: "a_testfeature1"}, + ] + json_result, fg_name = create_cached_featuregroup(@project.id, featurestore_id, features: features, featuregroup_name: "test_fg_a_#{short_random_id}", online:true) + parsed_json = JSON.parse(json_result) + fg_id = parsed_json["id"] + + + # create queryDTO object + query = { + leftFeatureGroup: { + id: fg_id + }, + leftFeatures: [{name: 'a_testfeature1'}], + } + + json_result, _ = create_feature_view(@project.id, featurestore_id, query) + parsed_json = JSON.parse(json_result) + expect_status_details(201) + + feature_view_name = parsed_json["name"] + feature_view_version = parsed_json["version"] + get "#{ENV['HOPSWORKS_API']}/project/#{@project.id}/featurestores/#{featurestore_id}/featureview/#{feature_view_name}/version/#{feature_view_version}/preparedstatement" + expect_status_details(400) + end + end + end + end +end diff --git a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/FeaturestoreKeywordBuilder.java b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/FeaturestoreKeywordBuilder.java index 5f3e5ac33c..9e61de08cc 100644 --- a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/FeaturestoreKeywordBuilder.java +++ b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/FeaturestoreKeywordBuilder.java @@ -73,7 +73,9 @@ private URI uri(UriInfo uriInfo, Project project, Featurestore featurestore, FeatureView featureView) { return uri(uriInfo, project, featurestore) .path(ResourceRequest.Name.FEATUREVIEW.toString().toLowerCase()) - .path(Integer.toString(featureView.getId())) + .path(featureView.getName()) + .path(ResourceRequest.Name.VERSION.toString().toLowerCase()) + .path(Integer.toString(featureView.getVersion())) .path(ResourceRequest.Name.KEYWORDS.toString().toLowerCase()) .build(); } @@ -94,13 +96,21 @@ public KeywordDTO build(UriInfo uriInfo, ResourceRequest resourceRequest, Projec } public KeywordDTO build(UriInfo uriInfo, ResourceRequest resourceRequest, Project project, - Featuregroup featureGroup, TrainingDataset trainingDataset, List keywords) { + Featuregroup featureGroup, List keywords) { KeywordDTO dto = new KeywordDTO(); - if (featureGroup != null) { - dto.setHref(uri(uriInfo, project, featureGroup.getFeaturestore(), featureGroup)); - } else { - dto.setHref(uri(uriInfo, project, trainingDataset.getFeaturestore(), trainingDataset)); + dto.setHref(uri(uriInfo, project, featureGroup.getFeaturestore(), featureGroup)); + + dto.setExpand(expand(resourceRequest)); + if (dto.isExpand()) { + dto.setKeywords(keywords); } + return dto; + } + + public KeywordDTO build(UriInfo uriInfo, ResourceRequest resourceRequest, Project project, + TrainingDataset trainingDataset, List keywords) { + KeywordDTO dto = new KeywordDTO(); + dto.setHref(uri(uriInfo, project, trainingDataset.getFeaturestore(), trainingDataset)); dto.setExpand(expand(resourceRequest)); if (dto.isExpand()) { diff --git a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/FeaturestoreKeywordResource.java b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/FeaturestoreKeywordResource.java index 52e19a36d9..c07989142c 100644 --- a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/FeaturestoreKeywordResource.java +++ b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/FeaturestoreKeywordResource.java @@ -16,6 +16,7 @@ package io.hops.hopsworks.api.featurestore; +import io.hops.hopsworks.api.featurestore.featureview.FeatureViewController; import io.hops.hopsworks.api.filter.AllowedProjectRoles; import io.hops.hopsworks.api.filter.Audience; import io.hops.hopsworks.api.filter.apiKey.ApiKeyRequired; @@ -30,10 +31,12 @@ import io.hops.hopsworks.jwt.annotation.JWTRequired; import io.hops.hopsworks.persistence.entity.featurestore.Featurestore; import io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup; +import io.hops.hopsworks.persistence.entity.featurestore.featureview.FeatureView; import io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset; import io.hops.hopsworks.persistence.entity.project.Project; import io.hops.hopsworks.persistence.entity.user.Users; import io.hops.hopsworks.persistence.entity.user.security.apiKey.ApiScope; +import io.hops.hopsworks.restutils.RESTCodes; import io.swagger.annotations.Api; import io.swagger.annotations.ApiOperation; @@ -55,6 +58,7 @@ import javax.ws.rs.core.UriInfo; import java.util.Arrays; import java.util.List; +import java.util.logging.Level; @RequestScoped @TransactionAttribute(TransactionAttributeType.NEVER) @@ -66,6 +70,8 @@ public class FeaturestoreKeywordResource { @EJB private TrainingDatasetController trainingDatasetController; @EJB + private FeatureViewController featureViewController; + @EJB private JWTHelper jwtHelper; @Inject private KeywordControllerIface keywordControllerIface; @@ -76,6 +82,7 @@ public class FeaturestoreKeywordResource { private Featurestore featurestore; private Featuregroup featuregroup; private TrainingDataset trainingDataset; + private FeatureView featureView; public void setProject(Project project) { this.project = project; @@ -93,6 +100,10 @@ public void setTrainingDatasetId(Integer trainingDatasetId) throws FeaturestoreE this.trainingDataset = trainingDatasetController.getTrainingDatasetById(featurestore, trainingDatasetId); } + public void setFeatureView(String name, Integer version) throws FeaturestoreException { + this.featureView = featureViewController.getByNameVersionAndFeatureStore(name, version, featurestore); + } + @GET @Produces(MediaType.APPLICATION_JSON) @ApiOperation(value = "Get keywords") @@ -102,11 +113,20 @@ public void setTrainingDatasetId(Integer trainingDatasetId) throws FeaturestoreE public Response getKeywords(@Context SecurityContext sc, @Context UriInfo uriInfo) throws FeaturestoreException, MetadataException { Users user = jwtHelper.getUserPrincipal(sc); - List keywords = keywordControllerIface.getAll(project, user, featuregroup, trainingDataset); ResourceRequest resourceRequest = new ResourceRequest(ResourceRequest.Name.KEYWORDS); - KeywordDTO dto = featurestoreKeywordBuilder.build(uriInfo, resourceRequest, project, - featuregroup, trainingDataset, keywords); + List keywords = keywordControllerIface.getAll(project, user, featuregroup, trainingDataset, featureView); + KeywordDTO dto; + if (featuregroup != null) { + dto = featurestoreKeywordBuilder.build(uriInfo, resourceRequest, project, featuregroup, keywords); + } else if (trainingDataset != null) { + dto = featurestoreKeywordBuilder.build(uriInfo, resourceRequest, project, trainingDataset, keywords); + } else if (featureView != null) { + dto = featurestoreKeywordBuilder.build(uriInfo, resourceRequest, project, featureView, keywords); + } else { + throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.KEYWORD_ERROR, Level.FINE, + "Error building keyword object"); + } return Response.ok().entity(dto).build(); } @@ -120,12 +140,21 @@ public Response getKeywords(@Context SecurityContext sc, @Context UriInfo uriInf public Response replaceKeywords(@Context SecurityContext sc, @Context UriInfo uriInfo, KeywordDTO keywordDTO) throws FeaturestoreException, MetadataException { Users user = jwtHelper.getUserPrincipal(sc); - List updatedKeywords = - keywordControllerIface.replaceKeywords(project, user, featuregroup, trainingDataset, keywordDTO.getKeywords()); ResourceRequest resourceRequest = new ResourceRequest(ResourceRequest.Name.KEYWORDS); - KeywordDTO dto = featurestoreKeywordBuilder.build(uriInfo, resourceRequest, project, - featuregroup, trainingDataset, updatedKeywords); + List updatedKeywords = keywordControllerIface.replaceKeywords(project, user, featuregroup, trainingDataset, + featureView, keywordDTO.getKeywords()); + KeywordDTO dto; + if (featuregroup != null) { + dto = featurestoreKeywordBuilder.build(uriInfo, resourceRequest, project, featuregroup, updatedKeywords); + } else if (trainingDataset != null) { + dto = featurestoreKeywordBuilder.build(uriInfo, resourceRequest, project, trainingDataset, updatedKeywords); + } else if (featureView != null) { + dto = featurestoreKeywordBuilder.build(uriInfo, resourceRequest, project, featureView, updatedKeywords); + } else { + throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.KEYWORD_ERROR, Level.FINE, + "Error building keyword object"); + } return Response.ok().entity(dto).build(); } @@ -138,12 +167,21 @@ public Response deleteKeywords(@Context SecurityContext sc, @Context UriInfo uri @QueryParam("keyword") String keyword) throws FeaturestoreException, MetadataException { Users user = jwtHelper.getUserPrincipal(sc); - List updatedKeywords = - keywordControllerIface.deleteKeywords(project, user, featuregroup, trainingDataset, Arrays.asList(keyword)); ResourceRequest resourceRequest = new ResourceRequest(ResourceRequest.Name.KEYWORDS); - KeywordDTO dto = featurestoreKeywordBuilder.build(uriInfo, resourceRequest, project, - featuregroup, trainingDataset, updatedKeywords); + List updatedKeywords = keywordControllerIface.deleteKeywords(project, user, featuregroup, trainingDataset, + featureView, Arrays.asList(keyword)); + KeywordDTO dto; + if (featuregroup != null) { + dto = featurestoreKeywordBuilder.build(uriInfo, resourceRequest, project, featuregroup, updatedKeywords); + } else if (trainingDataset != null) { + dto = featurestoreKeywordBuilder.build(uriInfo, resourceRequest, project, trainingDataset, updatedKeywords); + } else if (featureView != null) { + dto = featurestoreKeywordBuilder.build(uriInfo, resourceRequest, project, featureView, updatedKeywords); + } else { + throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.KEYWORD_ERROR, Level.FINE, + "Error building keyword object"); + } return Response.ok().entity(dto).build(); } diff --git a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/FsQueryBuilder.java b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/FsQueryBuilder.java index fa1f0d87b1..c8c970efa2 100644 --- a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/FsQueryBuilder.java +++ b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/FsQueryBuilder.java @@ -84,10 +84,18 @@ public FsQueryDTO build(UriInfo uriInfo, Project project, Users user, Featuresto public FsQueryDTO build(UriInfo uriInfo, Project project, Users user, FeatureView featureView) throws FeaturestoreException, ServiceException { - Query query = featureViewController.makeQuery(featureView, project, user); + Query query = queryController.makeQuery(featureView, project, user, false, false); FsQueryDTO dto = constructorController.construct(query, pitJoinController.isPitEnabled(query), true, project, user); dto.setHref(uri(uriInfo, project)); return dto; } + + public FsQueryDTO build(UriInfo uriInfo, Project project, Users user, Query query, Boolean isTrainingDataset) + throws FeaturestoreException, ServiceException { + FsQueryDTO dto = constructorController.construct(query, pitJoinController.isPitEnabled(query), + isTrainingDataset, project, user); + dto.setHref(uri(uriInfo, project)); + return dto; + } } diff --git a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/activities/ActivityBuilder.java b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/activities/ActivityBuilder.java index 6226a75385..88e507c545 100644 --- a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/activities/ActivityBuilder.java +++ b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/activities/ActivityBuilder.java @@ -29,6 +29,7 @@ import io.hops.hopsworks.persistence.entity.featurestore.activity.ActivityType; import io.hops.hopsworks.persistence.entity.featurestore.activity.FeaturestoreActivity; import io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup; +import io.hops.hopsworks.persistence.entity.featurestore.featureview.FeatureView; import io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset; import io.hops.hopsworks.persistence.entity.project.Project; import io.hops.hopsworks.persistence.entity.user.Users; @@ -84,6 +85,17 @@ private URI uri(UriInfo uriInfo, Project project, .build(); } + private URI uri(UriInfo uriInfo, Project project, + Featurestore featurestore, FeatureView featureView) { + return uri(uriInfo, project, featurestore) + .path(ResourceRequest.Name.FEATUREVIEW.toString().toLowerCase()) + .path(featureView.getName()) + .path(ResourceRequest.Name.VERSION.toString().toLowerCase()) + .path(Integer.toString(featureView.getVersion())) + .path(ResourceRequest.Name.ACTIVITIES.toString().toLowerCase()) + .build(); + } + private boolean expand(ResourceRequest resourceRequest) { return resourceRequest != null && resourceRequest.contains(ResourceRequest.Name.ACTIVITIES); } @@ -154,6 +166,36 @@ public ActivityDTO build(UriInfo uriInfo, ResourceRequest resourceRequest, Proje return dto; } + public ActivityDTO build(UriInfo uriInfo, ResourceRequest resourceRequest, Project project, Users user, + FeatureView featureView, FeaturestoreActivity featurestoreActivity) + throws FeaturestoreException { + ActivityDTO dto = new ActivityDTO(); + dto.setHref(uri(uriInfo, project, featureView.getFeaturestore(), featureView)); + dto.setExpand(expand(resourceRequest)); + if (dto.isExpand()) { + dto.setType(featurestoreActivity.getType()); + dto.setUser(usersBuilder.build(uriInfo, resourceRequest, featurestoreActivity.getUser())); + dto.setTimestamp(featurestoreActivity.getEventTime().getTime()); + + if (featurestoreActivity.getType() == ActivityType.JOB) { + dto.setJob(jobsBuilder.build(uriInfo, resourceRequest, + featurestoreActivity.getExecution().getJob(), featurestoreActivity.getExecution())); + } else if (featurestoreActivity.getType() == ActivityType.STATISTICS) { + dto.setStatistics(statisticsBuilder.build(uriInfo, resourceRequest, project, user, + featureView, featurestoreActivity.getStatistics())); + } else { + // Metadata change + String metadataMsg = featurestoreActivity.getActivityMeta().getValue(); + if (featurestoreActivity.getActivityMetaMsg() != null) { + metadataMsg += " " + featurestoreActivity.getActivityMetaMsg(); + } + dto.setMetadata(metadataMsg); + } + } + + return dto; + } + public ActivityDTO build(UriInfo uriInfo, ResourceRequest resourceRequest, Project project, Users user, Featuregroup featuregroup) throws FeaturestoreException { ActivityDTO dto = new ActivityDTO(); @@ -197,4 +239,26 @@ public ActivityDTO build(UriInfo uriInfo, ResourceRequest resourceRequest, return dto; } + + public ActivityDTO build(UriInfo uriInfo, ResourceRequest resourceRequest, + Project project, Users user, FeatureView featureView) throws FeaturestoreException { + ActivityDTO dto = new ActivityDTO(); + dto.setHref(uri(uriInfo, project, featureView.getFeaturestore(), featureView)); + dto.setExpand(expand(resourceRequest)); + if (dto.isExpand()) { + AbstractFacade.CollectionInfo collectionInfo = + activityFacade.findByFeatureView(featureView, + resourceRequest.getOffset(), + resourceRequest.getLimit(), + resourceRequest.getFilter(), + resourceRequest.getSort()); + dto.setCount(collectionInfo.getCount()); + + for (FeaturestoreActivity featurestoreActivity : collectionInfo.getItems()) { + dto.addItem(build(uriInfo, resourceRequest, project, user, featureView, featurestoreActivity)); + } + } + + return dto; + } } diff --git a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/activities/ActivityResource.java b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/activities/ActivityResource.java index e1d7f57f01..c68eeaa930 100644 --- a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/activities/ActivityResource.java +++ b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/activities/ActivityResource.java @@ -16,6 +16,7 @@ package io.hops.hopsworks.api.featurestore.activities; +import io.hops.hopsworks.api.featurestore.featureview.FeatureViewController; import io.hops.hopsworks.api.filter.AllowedProjectRoles; import io.hops.hopsworks.api.filter.Audience; import io.hops.hopsworks.api.filter.apiKey.ApiKeyRequired; @@ -28,6 +29,7 @@ import io.hops.hopsworks.jwt.annotation.JWTRequired; import io.hops.hopsworks.persistence.entity.featurestore.Featurestore; import io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup; +import io.hops.hopsworks.persistence.entity.featurestore.featureview.FeatureView; import io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset; import io.hops.hopsworks.persistence.entity.project.Project; import io.hops.hopsworks.persistence.entity.user.Users; @@ -58,6 +60,8 @@ public class ActivityResource { @EJB private TrainingDatasetController trainingDatasetController; @EJB + private FeatureViewController featureViewController; + @EJB private ActivityBuilder activityBuilder; @EJB private JWTHelper jwtHelper; @@ -66,6 +70,7 @@ public class ActivityResource { private Featurestore featurestore; private Featuregroup featuregroup; private TrainingDataset trainingDataset; + private FeatureView featureView; public void setProject(Project project) { this.project = project; @@ -83,6 +88,10 @@ public void setTrainingDatasetId(Integer trainingDatasetId) throws FeaturestoreE this.trainingDataset = trainingDatasetController.getTrainingDatasetById(featurestore, trainingDatasetId); } + public void setFeatureView(String name, Integer version) throws FeaturestoreException { + this.featureView = featureViewController.getByNameVersionAndFeatureStore(name, version, featurestore); + } + @GET @Produces(MediaType.APPLICATION_JSON) @ApiOperation(value = "Get all available statistics") @@ -106,8 +115,10 @@ public Response get(@BeanParam Pagination pagination, ActivityDTO dto = null; if (featuregroup != null) { dto = activityBuilder.build(uriInfo, resourceRequest, project, user, featuregroup); - } else { + } else if (trainingDataset != null) { dto = activityBuilder.build(uriInfo, resourceRequest, project, user, trainingDataset); + } else if (featureView != null) { + dto = activityBuilder.build(uriInfo, resourceRequest, project, user, featureView); } return Response.ok().entity(dto).build(); diff --git a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/featureview/FeatureViewBuilder.java b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/featureview/FeatureViewBuilder.java index 562dddc60d..300ba9352a 100644 --- a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/featureview/FeatureViewBuilder.java +++ b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/featureview/FeatureViewBuilder.java @@ -22,6 +22,7 @@ import io.hops.hopsworks.api.tags.TagBuilder; import io.hops.hopsworks.common.api.ResourceRequest; import io.hops.hopsworks.common.dao.user.UserDTO; +import io.hops.hopsworks.common.dataset.util.DatasetHelper; import io.hops.hopsworks.common.dataset.util.DatasetPath; import io.hops.hopsworks.common.featurestore.feature.TrainingDatasetFeatureDTO; import io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupDTO; @@ -29,6 +30,9 @@ import io.hops.hopsworks.common.featurestore.keyword.KeywordControllerIface; import io.hops.hopsworks.common.featurestore.keyword.KeywordDTO; import io.hops.hopsworks.common.featurestore.featureview.FeatureViewDTO; +import io.hops.hopsworks.common.featurestore.query.Query; +import io.hops.hopsworks.common.featurestore.query.QueryBuilder; +import io.hops.hopsworks.common.featurestore.query.QueryController; import io.hops.hopsworks.common.featurestore.trainingdatasets.TrainingDatasetController; import io.hops.hopsworks.common.hdfs.Utils; import io.hops.hopsworks.exceptions.DatasetException; @@ -36,6 +40,7 @@ import io.hops.hopsworks.exceptions.MetadataException; import io.hops.hopsworks.exceptions.SchematizedTagException; import io.hops.hopsworks.exceptions.ServiceException; +import io.hops.hopsworks.persistence.entity.dataset.DatasetType; import io.hops.hopsworks.persistence.entity.featurestore.featureview.FeatureView; import io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFeature; import io.hops.hopsworks.persistence.entity.project.Project; @@ -47,7 +52,7 @@ import javax.ejb.TransactionAttributeType; import javax.inject.Inject; import javax.ws.rs.core.UriInfo; -import java.io.IOException; +import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.stream.Collectors; @@ -61,7 +66,11 @@ public class FeatureViewBuilder { @EJB private TrainingDatasetController trainingDatasetController; @EJB - private FsQueryBuilder queryBuilder; + private FsQueryBuilder fsQueryBuilder; + @EJB + private QueryBuilder queryBuilder; + @EJB + private QueryController queryController; @EJB private OnlineFeaturegroupController onlineFeaturegroupController; @Inject @@ -70,14 +79,22 @@ public class FeatureViewBuilder { private FeaturestoreKeywordBuilder featurestoreKeywordBuilder; @EJB private TagBuilder tagsBuilder; + @EJB + private DatasetHelper datasetHelper; public FeatureViewBuilder() { } + public FeatureViewDTO build(FeatureView featureView, ResourceRequest resourceRequest, Project project, + Users user, UriInfo uriInfo) + throws FeaturestoreException, ServiceException, MetadataException, DatasetException, SchematizedTagException { + List featureViews = Arrays.asList(featureView); + return build(featureViews, resourceRequest, project, user, uriInfo); + } + public FeatureViewDTO build(List featureViews, ResourceRequest resourceRequest, Project project, Users user, UriInfo uriInfo) - throws FeaturestoreException, ServiceException, IOException, MetadataException, DatasetException, - SchematizedTagException { + throws FeaturestoreException, ServiceException, MetadataException, DatasetException, SchematizedTagException { FeatureViewDTO featureViewDTO = new FeatureViewDTO(); featureViewDTO.setHref(uriInfo.getRequestUri()); @@ -100,29 +117,31 @@ public FeatureViewDTO build(List featureViews, ResourceRequest reso } private FeatureViewDTO buildSingle(FeatureView featureView, ResourceRequest resourceRequest, Project project, - Users user, UriInfo uriInfo) throws FeaturestoreException, ServiceException, IOException, MetadataException, - DatasetException, SchematizedTagException { + Users user, UriInfo uriInfo) + throws FeaturestoreException, ServiceException, MetadataException, DatasetException, SchematizedTagException { FeatureViewDTO base = convertToDTO(featureView); if (resourceRequest != null) { - if (resourceRequest.contains(ResourceRequest.Name.QUERY)) { + if (resourceRequest.contains(ResourceRequest.Name.QUERY_STRING)) { // For the overview page of UI - base.setQueryString(queryBuilder.build(uriInfo, project, user, featureView)); + base.setQueryString(fsQueryBuilder.build(uriInfo, project, user, featureView)); + } + if (resourceRequest.contains(ResourceRequest.Name.QUERY)) { + Query query = queryController.makeQuery(featureView, project, user, true, false); + base.setQuery(queryBuilder.build(query, featureView.getFeaturestore(), project, user)); } if (resourceRequest.contains(ResourceRequest.Name.FEATURES)) { base.setFeatures(makeFeatures(featureView, project)); } if (resourceRequest.contains(ResourceRequest.Name.KEYWORDS)) { - // TODO feature view: revisit after implementation of keyword endpoint - List keywords = keywordControllerIface.getAll(project, user, featureView); + List keywords = keywordControllerIface.getAll(project, user, null, null, featureView); ResourceRequest keywordResourceRequest = new ResourceRequest(ResourceRequest.Name.KEYWORDS); KeywordDTO dto = featurestoreKeywordBuilder.build(uriInfo, keywordResourceRequest, project, featureView, keywords); base.setKeywords(dto); } - //TODO feature view: revisit after implementation of tag endpoint - //TODO add correct feature view path - DatasetPath path = null; + DatasetPath path = datasetHelper.getDatasetPath(project, featureViewController.getLocation(featureView), + DatasetType.DATASET); FeatureStoreTagUri tagUri = new FeatureStoreTagUri(uriInfo, featureView.getFeaturestore().getId(), ResourceRequest.Name.FEATUREVIEW, featureView.getId()); base.setTags(tagsBuilder.build(tagUri, resourceRequest, user, path)); @@ -132,6 +151,7 @@ private FeatureViewDTO buildSingle(FeatureView featureView, ResourceRequest reso public FeatureViewDTO convertToDTO(FeatureView featureView) { FeatureViewDTO featureViewDTO = new FeatureViewDTO(); + featureViewDTO.setId(featureView.getId()); featureViewDTO.setFeaturestoreId(featureView.getFeaturestore().getId()); featureViewDTO.setFeaturestoreName(featureView.getFeaturestore().getProject().getName()); featureViewDTO.setDescription(featureView.getDescription()); @@ -140,7 +160,6 @@ public FeatureViewDTO convertToDTO(FeatureView featureView) { featureViewDTO.setVersion(featureView.getVersion()); featureViewDTO.setName(featureView.getName()); featureViewDTO.setId(featureView.getId()); - featureViewDTO.setLabel(featureView.getLabel()); return featureViewDTO; } diff --git a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/featureview/FeatureViewController.java b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/featureview/FeatureViewController.java index a2e36c7449..a125513931 100644 --- a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/featureview/FeatureViewController.java +++ b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/featureview/FeatureViewController.java @@ -16,45 +16,54 @@ package io.hops.hopsworks.api.featurestore.featureview; -import io.hops.hopsworks.common.api.ResourceRequest; -import io.hops.hopsworks.common.dao.AbstractFacade; import io.hops.hopsworks.common.dao.QueryParam; +import io.hops.hopsworks.common.dao.user.activity.ActivityFacade; +import io.hops.hopsworks.common.featurestore.activity.FeaturestoreActivityFacade; import io.hops.hopsworks.common.featurestore.feature.TrainingDatasetFeatureDTO; -import io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupController; import io.hops.hopsworks.common.featurestore.featureview.FeatureViewDTO; import io.hops.hopsworks.common.featurestore.featureview.FeatureViewFacade; -import io.hops.hopsworks.common.featurestore.online.OnlineFeaturestoreController; -import io.hops.hopsworks.common.featurestore.query.Feature; import io.hops.hopsworks.common.featurestore.query.Query; import io.hops.hopsworks.common.featurestore.query.QueryController; import io.hops.hopsworks.common.featurestore.query.QueryDTO; -import io.hops.hopsworks.common.featurestore.query.join.Join; import io.hops.hopsworks.common.featurestore.query.pit.PitJoinController; +import io.hops.hopsworks.common.featurestore.storageconnectors.FeaturestoreConnectorFacade; import io.hops.hopsworks.common.featurestore.trainingdatasets.TrainingDatasetController; -import io.hops.hopsworks.common.featurestore.transformationFunction.TransformationFunctionFacade; +import io.hops.hopsworks.common.featurestore.trainingdatasets.TrainingDatasetFacade; import io.hops.hopsworks.common.featurestore.utils.FeaturestoreUtils; +import io.hops.hopsworks.common.hdfs.DistributedFileSystemOps; +import io.hops.hopsworks.common.hdfs.DistributedFsService; +import io.hops.hopsworks.common.hdfs.HdfsUsersController; +import io.hops.hopsworks.common.hdfs.inode.InodeController; +import io.hops.hopsworks.common.provenance.core.HopsFSProvenanceController; +import io.hops.hopsworks.common.util.Settings; import io.hops.hopsworks.exceptions.FeaturestoreException; +import io.hops.hopsworks.exceptions.ProvenanceException; +import io.hops.hopsworks.persistence.entity.dataset.Dataset; import io.hops.hopsworks.persistence.entity.featurestore.Featurestore; +import io.hops.hopsworks.persistence.entity.featurestore.activity.FeaturestoreActivityMeta; import io.hops.hopsworks.persistence.entity.featurestore.featureview.FeatureView; +import io.hops.hopsworks.persistence.entity.featurestore.storageconnector.FeaturestoreConnector; +import io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset; import io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFeature; +import io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFilter; import io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetJoin; +import io.hops.hopsworks.persistence.entity.hdfs.inode.Inode; import io.hops.hopsworks.persistence.entity.project.Project; import io.hops.hopsworks.persistence.entity.user.Users; +import io.hops.hopsworks.persistence.entity.user.activity.ActivityFlag; import io.hops.hopsworks.restutils.RESTCodes; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; import javax.ejb.EJB; import javax.ejb.Stateless; import javax.ejb.TransactionAttribute; import javax.ejb.TransactionAttributeType; import javax.inject.Inject; -import java.util.ArrayList; +import java.io.IOException; import java.util.Collection; -import java.util.Comparator; import java.util.Date; -import java.util.HashMap; import java.util.List; -import java.util.Map; -import java.util.Set; import java.util.logging.Level; import java.util.stream.Collectors; @@ -63,28 +72,41 @@ @Stateless @TransactionAttribute(TransactionAttributeType.NEVER) public class FeatureViewController { + private static final String PATH_TO_FEATURE_VIEW = "%s" + Path.SEPARATOR + ".featureviews" + Path.SEPARATOR + "%s_%d"; @EJB private FeatureViewFacade featureViewFacade; @EJB private QueryController queryController; + @EJB + private InodeController inodeController; @Inject private PitJoinController pitJoinController; @EJB - private TransformationFunctionFacade transformationFunctionFacade; + private DistributedFsService dfs; @EJB - private TrainingDatasetController trainingDatasetController; + private HdfsUsersController hdfsUsersBean; @EJB - private FeaturegroupController featuregroupController; + private TrainingDatasetController trainingDatasetController; @EJB - private OnlineFeaturestoreController onlineFeaturestoreController; + private FeaturestoreConnectorFacade featurestoreConnectorFacade; @EJB private FeatureViewInputValidator featureViewInputValidator; @EJB + private HopsFSProvenanceController fsProvenanceController; + @EJB + private FeaturestoreActivityFacade fsActivityFacade; + @EJB + private FeatureViewBuilder featureViewBuilder; + @EJB private FeaturestoreUtils featurestoreUtils; + @EJB + private TrainingDatasetFacade trainingDatasetFacade; + @EJB + private ActivityFacade activityFacade; - public FeatureView createFeatureView(FeatureView featureView, Featurestore featurestore) - throws FeaturestoreException { + public FeatureView createFeatureView(Project project, Users user, FeatureView featureView, Featurestore featurestore) + throws FeaturestoreException, ProvenanceException, IOException { // if version not provided, get latest and increment if (featureView.getVersion() == null) { // returns ordered list by desc version @@ -104,27 +126,84 @@ public FeatureView createFeatureView(FeatureView featureView, Featurestore featu "Feature view: " + featureView.getName() + ", version: " + featureView.getVersion()); } - featureView = featureViewFacade.update(featureView); - return featureView; + // Since training dataset created by feature view shares the same name, need to make sure name of feature view + // do not collide with existing training dataset created without feature view. + List trainingDatasets = trainingDatasetFacade + .findByNameAndFeaturestoreExcludeFeatureView(featureView.getName(), featurestore); + if (trainingDatasets != null && !trainingDatasets.isEmpty()) { + throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.FEATURE_VIEW_ALREADY_EXISTS, Level.FINE, + "Name of the feature view collides with an existing training dataset name : " + featureView.getName()); + } + + String connectorName = + featurestore.getProject().getName() + "_" + Settings.ServiceDataset.TRAININGDATASETS.getName(); + FeaturestoreConnector featurestoreConnector = + featurestoreConnectorFacade.findByFeaturestoreName(featurestore, connectorName) + .orElseThrow(() -> new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.HOPSFS_CONNECTOR_NOT_FOUND, + Level.FINE, "HOPSFS Connector: " + connectorName)); + + Dataset datasetsFolder = featurestoreConnector.getHopsfsConnector().getHopsfsDataset(); + + DistributedFileSystemOps udfso = null; + String username = hdfsUsersBean.getHdfsUserName(project, user); + try { + Path path = new Path(String.format(PATH_TO_FEATURE_VIEW, inodeController.getPath(datasetsFolder.getInode()), + featureView.getName(), featureView.getVersion())); + + udfso = dfs.getDfsOps(username); + udfso.mkdirs(path, FsPermission.getDefault()); + + Inode inode = inodeController.getInodeAtPath(path.toString()); + + featureView.setInode(inode); + featureView = featureViewFacade.update(featureView); + + // Log the metadata operation + fsActivityFacade.logMetadataActivity(user, featureView, FeaturestoreActivityMeta.FV_CREATED); + + activityFacade.persistActivity(ActivityFacade.CREATED_FEATURE_VIEW + featureView.getName(), project, user, + ActivityFlag.SERVICE); + + fsProvenanceController.featureViewAttachXAttr(path.toString(), featureView, udfso); + return featureView; + } finally { + if (udfso != null) { + dfs.closeDfsClient(udfso); + } + } + } + + public String getLocation(FeatureView featureView) { + return inodeController.getPath(featureView.getInode()); } public List getAll() { return featureViewFacade.findAll(); } - public List getByFeatureStore(Featurestore featurestore, ResourceRequest resourceRequest) { - return featureViewFacade.findByFeaturestore(featurestore, convertToQueryParam(resourceRequest)); + public List getByFeatureStore(Featurestore featurestore, QueryParam queryParam) { + return featureViewFacade.findByFeaturestore(featurestore, queryParam); } - public List getByNameAndFeatureStore(String name, Featurestore featurestore, - ResourceRequest resourceRequest) { - return featureViewFacade.findByNameAndFeaturestore(name, featurestore, convertToQueryParam(resourceRequest)); + public List getByNameAndFeatureStore(String name, Featurestore featurestore, QueryParam queryParam) + throws FeaturestoreException { + List featureViews = featureViewFacade.findByNameAndFeaturestore( + name, featurestore, queryParam); + if (featureViews.isEmpty()) { + throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.FEATURE_VIEW_NOT_FOUND, + Level.FINE, String.format("There exists no feature view with the name %s.", name)); + } + return featureViews; } - public List getByNameVersionAndFeatureStore(String name, Integer version, Featurestore featurestore, - ResourceRequest resourceRequest) { - return featureViewFacade.findByNameVersionAndFeaturestore(name, version, featurestore, - convertToQueryParam(resourceRequest)); + public FeatureView getByNameVersionAndFeatureStore(String name, Integer version, Featurestore featurestore) + throws FeaturestoreException { + List featureViews = featureViewFacade.findByNameVersionAndFeaturestore(name, version, featurestore); + if (featureViews.isEmpty()) { + throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.FEATURE_VIEW_NOT_FOUND, + Level.FINE, String.format("There exists no feature view with the name %s and version %d.", name, version)); + } + return featureViews.get(0); } public void delete(Users user, Project project, Featurestore featurestore, String name) @@ -150,16 +229,27 @@ private void delete(Users user, Project project, Featurestore featurestore, List } for (FeatureView fv: featureViews) { featureViewFacade.remove(fv); + activityFacade.persistActivity(ActivityFacade.DELETED_FEATURE_VIEW + fv.getName(), + project, user, ActivityFlag.SERVICE); } } - private QueryParam convertToQueryParam(ResourceRequest resourceRequest) { - return new QueryParam( - resourceRequest.getOffset(), - resourceRequest.getLimit(), - (Set) resourceRequest.getFilter(), - (Set) resourceRequest.getSort() - ); + public FeatureView update(Users user, Project project, Featurestore featurestore, FeatureViewDTO featureViewDTO) + throws FeaturestoreException { + FeatureView featureView = getByNameVersionAndFeatureStore(featureViewDTO.getName(), featureViewDTO.getVersion(), + featurestore); + + featurestoreUtils.verifyUserRole(featureView, featurestore, user, project); + + // Update metadata + featureView.setDescription(featureViewDTO.getDescription()); + featureViewFacade.update(featureView); + + activityFacade.persistActivity(ActivityFacade.EDITED_FEATURE_VIEW + featureViewDTO.getName(), project, user, + ActivityFlag.SERVICE); + + // Refetch the updated entry from the database + return getByNameVersionAndFeatureStore(featureViewDTO.getName(), featureViewDTO.getVersion(), featurestore); } public FeatureView convertFromDTO(Project project, Featurestore featurestore, Users user, @@ -176,85 +266,6 @@ public FeatureView convertFromDTO(Project project, Featurestore featurestore, Us return featureView; } - public Query makeQuery(FeatureView featureView, Project project, Users user) throws FeaturestoreException { - - List joins = featureView.getJoins().stream() - .sorted(Comparator.comparing(TrainingDatasetJoin::getIndex)) - .collect(Collectors.toList()); - - Map fgAliasLookup = trainingDatasetController.getAliasLookupTable(joins); - - List tdFeatures = featureView.getFeatures().stream() - .sorted((t1, t2) -> { - if (t1.getIndex() != null) { - // compare based on index - return t1.getIndex().compareTo(t2.getIndex()); - } else { - // Old training dataset with no index. compare based on name - return t1.getName().compareTo(t2.getName()); - } - }) - // drop label features if desired - .collect(Collectors.toList()); - - // Check that all the feature groups still exists, if not throw a reasonable error - if (tdFeatures.stream().anyMatch(j -> j.getFeatureGroup() == null)) { - throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_QUERY_FG_DELETED, Level.FINE); - } - - // Get available features for all involved feature groups once, and save in map fgId -> availableFeatures - Map> availableFeaturesLookup = new HashMap<>(); - for (TrainingDatasetJoin join : joins) { - if (!availableFeaturesLookup.containsKey(join.getFeatureGroup().getId())) { - List availableFeatures = featuregroupController.getFeatures(join.getFeatureGroup(), project, user) - .stream() - .map(f -> new Feature(f.getName(), - fgAliasLookup.get(join.getId()), f.getType(), f.getPrimary(), f.getDefaultValue(), join.getPrefix())) - .collect(Collectors.toList()); - availableFeaturesLookup.put(join.getFeatureGroup().getId(), availableFeatures); - } - } - - List features = new ArrayList<>(); - for (TrainingDatasetFeature requestedFeature : tdFeatures) { - features.add(availableFeaturesLookup.get(requestedFeature.getFeatureGroup().getId()) - .stream() - .filter(af -> af.getName().equals(requestedFeature.getName())) - // instantiate new feature since alias in available feature is not correct if fg is joined with itself - .map(af -> new Feature(af.getName(), fgAliasLookup.get(requestedFeature.getTrainingDatasetJoin().getId()), - af.getType(), af.getDefaultValue(), af.getPrefix(), requestedFeature.getFeatureGroup(), - requestedFeature.getIndex())) - .findFirst() - .orElseThrow( - () -> new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.FEATURE_DOES_NOT_EXIST, Level.FINE, - "Feature: " + requestedFeature.getName() + " not found in feature group: " + - requestedFeature.getFeatureGroup().getName()))); - } - - // Keep a map feature store id -> feature store name - Map fsLookup = trainingDatasetController.getFsLookupTableJoins(joins); - - Query query = new Query( - fsLookup.get(joins.get(0).getFeatureGroup().getFeaturestore().getId()), - onlineFeaturestoreController - .getOnlineFeaturestoreDbName(joins.get(0).getFeatureGroup().getFeaturestore().getProject()), - joins.get(0).getFeatureGroup(), - fgAliasLookup.get(joins.get(0).getId()), - features, - availableFeaturesLookup.get(joins.get(0).getFeatureGroup().getId()), - false); - - // Set the remaining feature groups as join - List queryJoins = new ArrayList<>(); - for (int i = 1; i < joins.size(); i++) { - // left side of the join stays fixed, the counter starts at 1 - queryJoins.add(trainingDatasetController.getQueryJoin(query, joins.get(i), fgAliasLookup, fsLookup, - availableFeaturesLookup, false)); - } - query.setJoins(queryJoins); - return query; - } - public List getFeaturesSorted(Collection features) { return features.stream() .sorted((t1, t2) -> { @@ -280,6 +291,9 @@ private void setQuery(Project project, Users user, QueryDTO queryDTO, FeatureVie List features = trainingDatasetController.collectFeatures(query, featureDTOs, null, featureView, 0, tdJoins, 0); featureView.setFeatures(features); + List filters = trainingDatasetController.convertToFilterEntities(query.getFilter(), + featureView, "L"); + featureView.setFilters(filters); } } diff --git a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/featureview/FeatureViewResource.java b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/featureview/FeatureViewResource.java index cb4ab3d097..2f8c4ee2a3 100644 --- a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/featureview/FeatureViewResource.java +++ b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/featureview/FeatureViewResource.java @@ -16,16 +16,18 @@ package io.hops.hopsworks.api.featurestore.featureview; +import com.google.api.client.util.Sets; import io.hops.hopsworks.api.filter.AllowedProjectRoles; import io.hops.hopsworks.api.filter.Audience; import io.hops.hopsworks.api.filter.apiKey.ApiKeyRequired; import io.hops.hopsworks.api.jwt.JWTHelper; import io.hops.hopsworks.common.api.ResourceRequest; -import io.hops.hopsworks.common.dao.user.activity.ActivityFacade; +import io.hops.hopsworks.common.dao.QueryParam; import io.hops.hopsworks.common.featurestore.featureview.FeatureViewDTO; import io.hops.hopsworks.exceptions.DatasetException; import io.hops.hopsworks.exceptions.FeaturestoreException; import io.hops.hopsworks.exceptions.MetadataException; +import io.hops.hopsworks.exceptions.ProvenanceException; import io.hops.hopsworks.exceptions.SchematizedTagException; import io.hops.hopsworks.exceptions.ServiceException; import io.hops.hopsworks.jwt.annotation.JWTRequired; @@ -33,7 +35,6 @@ import io.hops.hopsworks.persistence.entity.featurestore.featureview.FeatureView; import io.hops.hopsworks.persistence.entity.project.Project; import io.hops.hopsworks.persistence.entity.user.Users; -import io.hops.hopsworks.persistence.entity.user.activity.ActivityFlag; import io.hops.hopsworks.persistence.entity.user.security.apiKey.ApiScope; import io.swagger.annotations.Api; import io.swagger.annotations.ApiOperation; @@ -49,6 +50,7 @@ import javax.ws.rs.DELETE; import javax.ws.rs.GET; import javax.ws.rs.POST; +import javax.ws.rs.PUT; import javax.ws.rs.Path; import javax.ws.rs.PathParam; import javax.ws.rs.Produces; @@ -58,7 +60,9 @@ import javax.ws.rs.core.SecurityContext; import javax.ws.rs.core.UriInfo; import java.io.IOException; +import java.util.HashSet; import java.util.List; +import java.util.Set; @RequestScoped @Api(value = "Feature View Resource") @@ -71,8 +75,7 @@ public class FeatureViewResource { private FeatureViewController featureViewController; @EJB private FeatureViewBuilder featureViewBuilder; - @EJB - private ActivityFacade activityFacade; + private Project project; private Featurestore featurestore; @@ -91,13 +94,26 @@ public Response create( SecurityContext sc, @Context HttpServletRequest req, - FeatureViewDTO featureViewDTO) throws FeaturestoreException { + @Context + UriInfo uriInfo, + FeatureViewDTO featureViewDTO) throws FeaturestoreException, ProvenanceException, ServiceException, IOException, + SchematizedTagException, MetadataException, DatasetException { + if (featureViewDTO == null) { + throw new IllegalArgumentException("Input JSON for creating a new Feature View cannot be null"); + } Users user = jWTHelper.getUserPrincipal(sc); + FeatureView featureView = featureViewController.convertFromDTO(project, featurestore, user, featureViewDTO); - featureView = featureViewController.createFeatureView(featureView, featurestore); - activityFacade.persistActivity(ActivityFacade.CREATED_FEATURE_VIEW + - featureView.getName(), project, user, ActivityFlag.SERVICE); - return Response.ok().entity(featureViewBuilder.convertToDTO(featureView)).build(); + featureView = featureViewController.createFeatureView(project, user, featureView, featurestore); + + ResourceRequest resourceRequest = new ResourceRequest(ResourceRequest.Name.FEATUREVIEW); + Set expansions = Sets.newHashSet(); + expansions.add(new ResourceRequest(ResourceRequest.Name.QUERY)); + expansions.add(new ResourceRequest(ResourceRequest.Name.FEATURES)); + resourceRequest.setExpansions(expansions); + featureViewDTO = featureViewBuilder.build(featureView, resourceRequest, project, user, uriInfo); + + return Response.created(featureViewDTO.getHref()).entity(featureViewDTO).build(); } @GET @@ -119,7 +135,9 @@ public Response getAll( SchematizedTagException { Users user = jWTHelper.getUserPrincipal(sc); ResourceRequest resourceRequest = makeResourceRequest(param); - List featureViews = featureViewController.getByFeatureStore(featurestore, resourceRequest); + List featureViews = featureViewController.getByFeatureStore(featurestore, + convertToQueryParam(resourceRequest)); + return Response.ok() .entity(featureViewBuilder.build(featureViews, resourceRequest, project, user, uriInfo)) .build(); @@ -149,7 +167,8 @@ public Response getByName( Users user = jWTHelper.getUserPrincipal(sc); ResourceRequest resourceRequest = makeResourceRequest(param); List featureViews = featureViewController.getByNameAndFeatureStore(name, featurestore, - resourceRequest); + convertToQueryParam(resourceRequest)); + return Response.ok() .entity(featureViewBuilder.build(featureViews, resourceRequest, project, user, uriInfo)) .build(); @@ -176,28 +195,17 @@ public Response getByNameVersion( String name, @PathParam("version") Integer version - ) throws FeaturestoreException, ServiceException, IOException, MetadataException, DatasetException, + ) throws FeaturestoreException, ServiceException, MetadataException, DatasetException, SchematizedTagException { Users user = jWTHelper.getUserPrincipal(sc); ResourceRequest resourceRequest = makeResourceRequest(param); - List featureViews = - featureViewController.getByNameVersionAndFeatureStore(name, version, featurestore, - resourceRequest); + FeatureView featureView = featureViewController.getByNameVersionAndFeatureStore(name, version, featurestore); + return Response.ok() - .entity(featureViewBuilder.build(featureViews, resourceRequest, project, user, uriInfo)) + .entity(featureViewBuilder.build(featureView, resourceRequest, project, user, uriInfo)) .build(); } - private ResourceRequest makeResourceRequest(FeatureViewBeanParam param) { - ResourceRequest resourceRequest = new ResourceRequest(ResourceRequest.Name.FEATUREVIEW); - resourceRequest.setOffset(param.getPagination().getOffset()); - resourceRequest.setLimit(param.getPagination().getLimit()); - resourceRequest.setSort(param.getParsedSortBy()); - resourceRequest.setFilter(param.getFilters()); - resourceRequest.setExpansions(param.getExpansion().getResources()); - return resourceRequest; - } - @DELETE @Path("/{name: [a-z0-9_]*(?=[a-z])[a-z0-9_]+}") @AllowedProjectRoles({AllowedProjectRoles.DATA_OWNER, AllowedProjectRoles.DATA_SCIENTIST}) @@ -215,8 +223,7 @@ public Response deleteName( ) throws FeaturestoreException { Users user = jWTHelper.getUserPrincipal(sc); featureViewController.delete(user, project, featurestore, name); - activityFacade.persistActivity(ActivityFacade.DELETED_FEATURE_VIEW + name, - project, user, ActivityFlag.SERVICE); + return Response.ok().build(); } @@ -239,11 +246,40 @@ public Response deleteNameVersion( ) throws FeaturestoreException { Users user = jWTHelper.getUserPrincipal(sc); featureViewController.delete(user, project, featurestore, name, version); - activityFacade.persistActivity(ActivityFacade.DELETED_FEATURE_VIEW + name, - project, user, ActivityFlag.SERVICE); + return Response.ok().build(); } + @PUT + @Produces(MediaType.APPLICATION_JSON) + @Consumes(MediaType.APPLICATION_JSON) + @AllowedProjectRoles({AllowedProjectRoles.DATA_OWNER, AllowedProjectRoles.DATA_SCIENTIST}) + @JWTRequired(acceptedTokens = {Audience.API, Audience.JOB}, allowedUserRoles = {"HOPS_ADMIN", "HOPS_USER"}) + @ApiKeyRequired(acceptedScopes = {ApiScope.FEATURESTORE}, allowedUserRoles = {"HOPS_ADMIN", "HOPS_USER"}) + @ApiOperation(value = "Update Feature View metadata.") + public Response update( + @Context + SecurityContext sc, + @Context + HttpServletRequest req, + @Context + UriInfo uriInfo, + @BeanParam + FeatureViewBeanParam param, + FeatureViewDTO featureViewDTO) throws FeaturestoreException, ProvenanceException, ServiceException, IOException, + SchematizedTagException, MetadataException, DatasetException { + if (featureViewDTO == null) { + throw new IllegalArgumentException("Input JSON for updating a Feature View cannot be null"); + } + Users user = jWTHelper.getUserPrincipal(sc); + ResourceRequest resourceRequest = makeResourceRequest(param); + FeatureView featureView = featureViewController.update(user, project, featurestore, featureViewDTO); + + return Response.ok() + .entity(featureViewBuilder.build(featureView, resourceRequest, project, user, uriInfo)) + .build(); + } + public void setProject(Project project) { this.project = project; } @@ -251,4 +287,23 @@ public void setProject(Project project) { public void setFeaturestore(Featurestore featurestore) { this.featurestore = featurestore; } + + private ResourceRequest makeResourceRequest(FeatureViewBeanParam param) { + ResourceRequest resourceRequest = new ResourceRequest(ResourceRequest.Name.FEATUREVIEW); + resourceRequest.setOffset(param.getPagination().getOffset()); + resourceRequest.setLimit(param.getPagination().getLimit()); + resourceRequest.setSort(param.getParsedSortBy()); + resourceRequest.setFilter(param.getFilters()); + resourceRequest.setExpansions(param.getExpansion().getResources()); + return resourceRequest; + } + + private QueryParam convertToQueryParam(ResourceRequest resourceRequest) { + return new QueryParam( + resourceRequest.getOffset(), + resourceRequest.getLimit(), + resourceRequest.getFilter() == null ? new HashSet<>() : new HashSet<>(resourceRequest.getFilter()), + resourceRequest.getSort() == null ? new HashSet<>() : new HashSet<>(resourceRequest.getSort()) + ); + } } \ No newline at end of file diff --git a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/featureview/FeatureViewService.java b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/featureview/FeatureViewService.java index 19d3e73db9..d0f0705c7d 100644 --- a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/featureview/FeatureViewService.java +++ b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/featureview/FeatureViewService.java @@ -54,24 +54,27 @@ public class FeatureViewService { @Inject private TransformationResource transformationResource; @Inject - private PreparedStatementResource prepareStatementResource; + private PreparedStatementResource preparedStatementResource; @Inject private FeaturestoreKeywordResource featurestoreKeywordResource; @EJB private FeaturestoreController featurestoreController; + @EJB + private FeatureViewController featureViewController; @Inject private ActivityResource activityResource; + private Project project; private Featurestore featurestore; @Path("") public FeatureViewResource featureViewResource() { - featureViewResource.setFeaturestore(featurestore); - featureViewResource.setProject(project); - return featureViewResource; + this.featureViewResource.setProject(project); + this.featureViewResource.setFeaturestore(featurestore); + return this.featureViewResource; } - @Path("/{name: [a-z0-9_]*(?=[a-z])[a-z0-9_]+}/version/{version: [0-9]+}/td") + @Path("/{name: [a-z0-9_]*(?=[a-z])[a-z0-9_]+}/version/{version: [0-9]+}/trainingdatasets") public TrainingDatasetResource trainingDatasetResource( @ApiParam(value = "Name of the feature view", required = true) @PathParam("name") @@ -79,26 +82,11 @@ public TrainingDatasetResource trainingDatasetResource( @ApiParam(value = "Version of the feature view", required = true) @PathParam("version") Integer version - ) { - trainingDatasetResource.setFeatureView(featureViewName, version); - return trainingDatasetResource; - } - - @Path("/{name: [a-z0-9_]*(?=[a-z])[a-z0-9_]+}/version/{version: [0-9]+}/tags") - public FeatureViewTagResource tags( - @ApiParam(value = "Name of the feature view", required = true) - @PathParam("name") - String featureViewName, - @ApiParam(value = "Version of the feature view", required = true) - @PathParam("version") - Integer version - ) { - - tagResource.setProject(project); - tagResource.setFeatureStore(featurestore); - //TODO get the actual feature view instance - tagResource.setFeatureView(null); - return tagResource; + ) throws FeaturestoreException { + this.trainingDatasetResource.setProject(project); + this.trainingDatasetResource.setFeaturestore(featurestore); + this.trainingDatasetResource.setFeatureView(featureViewName, version); + return this.trainingDatasetResource; } @Path("/{name: [a-z0-9_]*(?=[a-z])[a-z0-9_]+}/version/{version: [0-9]+}/query") @@ -109,9 +97,11 @@ public QueryResource query( @ApiParam(value = "Version of the feature view", required = true) @PathParam("version") Integer version - ) { - queryResource.setFeatureView(featureViewName, version); - return queryResource; + ) throws FeaturestoreException { + this.queryResource.setProject(project); + this.queryResource.setFeaturestore(featurestore); + this.queryResource.setFeatureView(featureViewName, version); + return this.queryResource; } @Path("/{name: [a-z0-9_]*(?=[a-z])[a-z0-9_]+}/version/{version: [0-9]+}/keywords") @@ -122,8 +112,11 @@ public FeaturestoreKeywordResource keywords( @ApiParam(value = "Version of the feature view", required = true) @PathParam("version") Integer version - ) { - return featurestoreKeywordResource; + ) throws FeaturestoreException { + this.featurestoreKeywordResource.setProject(project); + this.featurestoreKeywordResource.setFeaturestore(featurestore); + this.featurestoreKeywordResource.setFeatureView(featureViewName, version); + return this.featurestoreKeywordResource; } @Path("/{name: [a-z0-9_]*(?=[a-z])[a-z0-9_]+}/version/{version: [0-9]+}/activity") @@ -134,7 +127,10 @@ public ActivityResource activity( @ApiParam(value = "Version of the feature view", required = true) @PathParam("version") Integer version - ) { + ) throws FeaturestoreException { + this.activityResource.setProject(project); + this.activityResource.setFeaturestore(featurestore); + this.activityResource.setFeatureView(featureViewName, version); return this.activityResource; } @@ -146,11 +142,14 @@ public TransformationResource transformation( @ApiParam(value = "Version of the feature view", required = true) @PathParam("version") Integer version - ) { - return transformationResource; + ) throws FeaturestoreException { + this.transformationResource.setProject(project); + this.transformationResource.setFeaturestore(featurestore); + this.transformationResource.setFeatureView(featureViewName, version); + return this.transformationResource; } - @Path("/{name: [a-z0-9_]*(?=[a-z])[a-z0-9_]+}/version/{version: [0-9]+}/preparedStatement") + @Path("/{name: [a-z0-9_]*(?=[a-z])[a-z0-9_]+}/version/{version: [0-9]+}/preparedstatement") public PreparedStatementResource preparedStatement( @ApiParam(value = "Name of the feature view", required = true) @PathParam("name") @@ -158,8 +157,11 @@ public PreparedStatementResource preparedStatement( @ApiParam(value = "Version of the feature view", required = true) @PathParam("version") Integer version - ) { - return prepareStatementResource; + ) throws FeaturestoreException { + this.preparedStatementResource.setProject(project); + this.preparedStatementResource.setFeatureStore(featurestore); + this.preparedStatementResource.setFeatureView(featureViewName, version); + return this.preparedStatementResource; } public void setProject(Project project) { diff --git a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/featureview/FeatureViewTagResource.java b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/featureview/FeatureViewTagResource.java index b39c1f8e20..14685a99fb 100644 --- a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/featureview/FeatureViewTagResource.java +++ b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/featureview/FeatureViewTagResource.java @@ -17,9 +17,14 @@ import io.hops.hopsworks.api.featurestore.tag.FeatureStoreTagResource; import io.hops.hopsworks.common.api.ResourceRequest; +import io.hops.hopsworks.common.dataset.util.DatasetHelper; import io.hops.hopsworks.common.dataset.util.DatasetPath; +import io.hops.hopsworks.exceptions.DatasetException; +import io.hops.hopsworks.exceptions.FeaturestoreException; +import io.hops.hopsworks.persistence.entity.dataset.DatasetType; import io.hops.hopsworks.persistence.entity.featurestore.featureview.FeatureView; +import javax.ejb.EJB; import javax.ejb.TransactionAttribute; import javax.ejb.TransactionAttributeType; import javax.enterprise.context.RequestScoped; @@ -29,20 +34,25 @@ public class FeatureViewTagResource extends FeatureStoreTagResource { private FeatureView featureView; + @EJB + private DatasetHelper datasetHelper; + @EJB + private FeatureViewController featureViewController; /** * Sets the feature view of the tag resource * - * @param featureView + * @param name + * @param version */ - public void setFeatureView(FeatureView featureView) { - this.featureView = featureView; + public void setFeatureView(String name, Integer version) throws FeaturestoreException { + this.featureView = featureViewController.getByNameVersionAndFeatureStore(name, version, featureStore); } - + @Override - protected DatasetPath getDatasetPath() { - //TODO get reliable feature view path - return null; + protected DatasetPath getDatasetPath() throws DatasetException { + return datasetHelper.getDatasetPath(project, featureViewController.getLocation(featureView), + DatasetType.DATASET); } @Override diff --git a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/preparestatement/PreparedStatementResource.java b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/preparestatement/PreparedStatementResource.java index 56b95941d2..f935e627ef 100644 --- a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/preparestatement/PreparedStatementResource.java +++ b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/preparestatement/PreparedStatementResource.java @@ -16,19 +16,21 @@ package io.hops.hopsworks.api.featurestore.preparestatement; +import io.hops.hopsworks.api.featurestore.featureview.FeatureViewController; import io.hops.hopsworks.api.featurestore.trainingdataset.PreparedStatementBuilder; import io.hops.hopsworks.api.filter.AllowedProjectRoles; import io.hops.hopsworks.api.filter.Audience; import io.hops.hopsworks.api.filter.apiKey.ApiKeyRequired; import io.hops.hopsworks.api.jwt.JWTHelper; +import io.hops.hopsworks.common.api.ResourceRequest; import io.hops.hopsworks.common.featurestore.query.ServingPreparedStatementDTO; import io.hops.hopsworks.exceptions.FeaturestoreException; import io.hops.hopsworks.jwt.annotation.JWTRequired; import io.hops.hopsworks.persistence.entity.featurestore.Featurestore; +import io.hops.hopsworks.persistence.entity.featurestore.featureview.FeatureView; import io.hops.hopsworks.persistence.entity.project.Project; import io.hops.hopsworks.persistence.entity.user.Users; import io.hops.hopsworks.persistence.entity.user.security.apiKey.ApiScope; -import io.hops.hopsworks.restutils.RESTCodes; import io.swagger.annotations.ApiOperation; import io.swagger.annotations.ApiParam; @@ -54,12 +56,27 @@ public class PreparedStatementResource { @EJB private JWTHelper jWTHelper; @EJB + private FeatureViewController featureViewController; + @EJB private PreparedStatementBuilder preparedStatementBuilder; + private Project project; private Featurestore featurestore; - private Integer trainingDatasetId; + private FeatureView featureView; + + public void setProject(Project project) { + this.project = project; + } + + public void setFeatureStore(Featurestore featurestore) { + this.featurestore = featurestore; + } - @ApiOperation(value = "Get prepared statements used to generate model serving vector from training dataset query", + public void setFeatureView(String name, Integer version) throws FeaturestoreException { + featureView = featureViewController.getByNameVersionAndFeatureStore(name, version, featurestore); + } + + @ApiOperation(value = "Get prepared statements used to generate model serving vector from feature view query", response = ServingPreparedStatementDTO.class) @GET @Produces(MediaType.APPLICATION_JSON) @@ -78,22 +95,9 @@ public Response getPreparedStatements( @DefaultValue("false") boolean batch) throws FeaturestoreException { - verifyIdProvided(trainingDatasetId); Users user = jWTHelper.getUserPrincipal(sc); - - // refer to TrainingDatasetService.getPreparedStatements() for implementation detail - return Response.ok().build(); - } - - /** - * Verify that the user id was provided as a path param - * - * @param trainingDatasetId - * the training dataset id to verify - */ - private void verifyIdProvided(Integer trainingDatasetId) { - if (trainingDatasetId == null) { - throw new IllegalArgumentException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_ID_NOT_PROVIDED.getMessage()); - } + ServingPreparedStatementDTO servingPreparedStatementDTO = preparedStatementBuilder.build(uriInfo, + new ResourceRequest(ResourceRequest.Name.PREPAREDSTATEMENTS), project, user, featurestore, featureView, batch); + return Response.ok().entity(servingPreparedStatementDTO).build(); } } diff --git a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/query/QueryResource.java b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/query/QueryResource.java index f1bbdf07dc..6e3dac6fff 100644 --- a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/query/QueryResource.java +++ b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/query/QueryResource.java @@ -16,57 +16,127 @@ package io.hops.hopsworks.api.featurestore.query; -import io.hops.hadoop.shaded.javax.ws.rs.core.Context; +import io.hops.hopsworks.api.featurestore.FsQueryBuilder; +import io.hops.hopsworks.api.featurestore.featureview.FeatureViewController; +import io.hops.hopsworks.api.filter.AllowedProjectRoles; import io.hops.hopsworks.api.filter.Audience; +import io.hops.hopsworks.api.filter.apiKey.ApiKeyRequired; +import io.hops.hopsworks.api.jwt.JWTHelper; +import io.hops.hopsworks.common.featurestore.query.FsQueryDTO; +import io.hops.hopsworks.common.featurestore.query.Query; +import io.hops.hopsworks.common.featurestore.query.QueryBuilder; +import io.hops.hopsworks.common.featurestore.query.QueryController; import io.hops.hopsworks.common.featurestore.query.QueryDTO; +import io.hops.hopsworks.exceptions.FeaturestoreException; +import io.hops.hopsworks.exceptions.ServiceException; import io.hops.hopsworks.jwt.annotation.JWTRequired; +import io.hops.hopsworks.persistence.entity.featurestore.Featurestore; +import io.hops.hopsworks.persistence.entity.featurestore.featureview.FeatureView; +import io.hops.hopsworks.persistence.entity.project.Project; +import io.hops.hopsworks.persistence.entity.user.Users; +import io.hops.hopsworks.persistence.entity.user.security.apiKey.ApiScope; +import io.swagger.annotations.ApiOperation; import io.swagger.annotations.ApiParam; +import javax.ejb.EJB; import javax.ejb.TransactionAttribute; import javax.ejb.TransactionAttributeType; import javax.enterprise.context.RequestScoped; import javax.servlet.http.HttpServletRequest; +import javax.ws.rs.DefaultValue; import javax.ws.rs.GET; import javax.ws.rs.Path; +import javax.ws.rs.Produces; import javax.ws.rs.QueryParam; +import javax.ws.rs.core.Context; +import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; import javax.ws.rs.core.SecurityContext; +import javax.ws.rs.core.UriInfo; @RequestScoped @TransactionAttribute(TransactionAttributeType.NEVER) public class QueryResource { + @EJB + private FeatureViewController featureViewController; + @EJB + private QueryController queryController; + @EJB + private JWTHelper jWTHelper; + @EJB + private FsQueryBuilder fsQueryBuilder; + @EJB + private QueryBuilder queryBuilder; + + private Featurestore featurestore; + private FeatureView featureView; + private Project project; + + @ApiOperation(value = "Return batch query with given event time.", + response = FsQueryDTO.class) @GET @Path("/batch") + @Produces(MediaType.APPLICATION_JSON) + @AllowedProjectRoles({AllowedProjectRoles.DATA_OWNER, AllowedProjectRoles.DATA_SCIENTIST}) + @ApiKeyRequired(acceptedScopes = {ApiScope.FEATURESTORE}, allowedUserRoles = {"HOPS_ADMIN", "HOPS_USER"}) @JWTRequired(acceptedTokens = {Audience.API, Audience.JOB}, allowedUserRoles = {"HOPS_ADMIN", "HOPS_USER"}) public Response constructBatchQuery( @Context SecurityContext sc, @Context HttpServletRequest req, + @Context + UriInfo uriInfo, @ApiParam(value = "Event start time") - @QueryParam("startTime") - String startTime, + @QueryParam("start_time") + Long startTime, @ApiParam(value = "Event end time") - @QueryParam("endTime") - String endTime - ) { - // return an offline query string - return Response.ok().entity("").build(); + @QueryParam("end_time") + Long endTime, + @ApiParam(value = "Get query with label features") + @QueryParam("with_label") + @DefaultValue("false") + Boolean withLabel, + @ApiParam(value = "Get query in hive format") + @QueryParam("is_hive_engine") + @DefaultValue("false") + Boolean isHiveEngine + ) throws FeaturestoreException, ServiceException { + Users user = jWTHelper.getUserPrincipal(sc); + Query query = queryController.constructBatchQuery( + featureView, project, user, startTime, endTime, withLabel, isHiveEngine); + return Response.ok().entity(queryBuilder.build(query, featurestore, project, user)).build(); } + @ApiOperation(value = "Return query originally used to create the feature view without event time filter.", + response = QueryDTO.class) @GET + @Produces(MediaType.APPLICATION_JSON) + @AllowedProjectRoles({AllowedProjectRoles.DATA_OWNER, AllowedProjectRoles.DATA_SCIENTIST}) + @ApiKeyRequired(acceptedScopes = {ApiScope.FEATURESTORE}, allowedUserRoles = {"HOPS_ADMIN", "HOPS_USER"}) @JWTRequired(acceptedTokens = {Audience.API, Audience.JOB}, allowedUserRoles = {"HOPS_ADMIN", "HOPS_USER"}) public Response getQuery( @Context SecurityContext sc, @Context HttpServletRequest req - ) { - // return query originally used to create the feature view without event time filter - return Response.ok().entity(new QueryDTO()).build(); + ) throws FeaturestoreException, ServiceException { + Users user = jWTHelper.getUserPrincipal(sc); + Query query = queryController.makeQuery(featureView, project, user, true, false); + QueryDTO queryDTO = queryBuilder.build(query, featurestore, project, user); + return Response.ok().entity(queryDTO).build(); + } + + public void setFeatureView(String name, Integer version) throws FeaturestoreException { + featureView = featureViewController.getByNameVersionAndFeatureStore(name, version, featurestore); + } + + public void setProject(Project project) { + this.project = project; } - public void setFeatureView(String name, Integer version) { + public void setFeaturestore(Featurestore featurestore) { + this.featurestore = featurestore; } } diff --git a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/statistics/StatisticsBuilder.java b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/statistics/StatisticsBuilder.java index a052f41cfc..97e9dd0f42 100644 --- a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/statistics/StatisticsBuilder.java +++ b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/statistics/StatisticsBuilder.java @@ -21,6 +21,7 @@ import io.hops.hopsworks.common.featurestore.statistics.FeaturestoreStatisticFacade; import io.hops.hopsworks.common.featurestore.statistics.StatisticsController; import io.hops.hopsworks.persistence.entity.featurestore.Featurestore; +import io.hops.hopsworks.persistence.entity.featurestore.featureview.FeatureView; import io.hops.hopsworks.persistence.entity.featurestore.statistics.FeaturestoreStatistic; import io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup; import io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset; @@ -95,6 +96,21 @@ private URI uri(UriInfo uriInfo, Project project, Featurestore featurestore, .build(); } + private URI uri(UriInfo uriInfo, Project project, Featurestore featurestore, + FeatureView featureView, FeaturestoreStatistic statistics) { + return uri(uriInfo, project, featurestore) + .path(ResourceRequest.Name.FEATUREVIEW.toString().toLowerCase()) + .path(featureView.getName()) + .path(ResourceRequest.Name.VERSION.toString().toLowerCase()) + .path(Integer.toString(featureView.getVersion())) + .path(ResourceRequest.Name.TRAININGDATASETS.toString().toLowerCase()) + .path(Integer.toString(statistics.getTrainingDataset().getId())) + .path(ResourceRequest.Name.STATISTICS.toString().toLowerCase()) + .queryParam("filter_by", "commit_time_eq:" + statistics.getCommitTime()) + .queryParam("fields", "content") + .build(); + } + private boolean expand(ResourceRequest resourceRequest) { return resourceRequest != null && resourceRequest.contains(ResourceRequest.Name.STATISTICS); } @@ -148,6 +164,23 @@ public StatisticsDTO build(UriInfo uriInfo, ResourceRequest resourceRequest, return dto; } + public StatisticsDTO build(UriInfo uriInfo, ResourceRequest resourceRequest, + Project project, Users user, + FeatureView featureView, + FeaturestoreStatistic featurestoreStatistic) throws FeaturestoreException { + StatisticsDTO dto = new StatisticsDTO(); + dto.setHref(uri(uriInfo, project, featureView.getFeaturestore(), featureView, featurestoreStatistic)); + dto.setExpand(expand(resourceRequest)); + if (dto.isExpand()) { + dto.setCommitTime(featurestoreStatistic.getCommitTime().getTime()); + if (resourceRequest.getField() != null && resourceRequest.getField().contains("content")) { + dto.setContent(statisticsController.readStatisticsContent(project, user, featurestoreStatistic)); + } + } + + return dto; + } + public StatisticsDTO build(UriInfo uriInfo, ResourceRequest resourceRequest, Project project, Users user, Featurestore featurestore, Featuregroup featuregroup) throws FeaturestoreException { diff --git a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/statistics/StatisticsResource.java b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/statistics/StatisticsResource.java index d6a05268ed..08b49151f9 100644 --- a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/statistics/StatisticsResource.java +++ b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/statistics/StatisticsResource.java @@ -110,6 +110,10 @@ public void setTrainingDatasetId(Integer trainingDatasetId) throws FeaturestoreE this.trainingDataset = trainingDatasetController.getTrainingDatasetById(featurestore, trainingDatasetId); } + public void setTrainingDataset(TrainingDataset trainingDataset) { + this.trainingDataset = trainingDataset; + } + @GET @Produces(MediaType.APPLICATION_JSON) @ApiOperation(value = "Get all available statistics", response = StatisticsDTO.class) diff --git a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/trainingdataset/PreparedStatementBuilder.java b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/trainingdataset/PreparedStatementBuilder.java index 37ea932b88..1a84ded86f 100644 --- a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/trainingdataset/PreparedStatementBuilder.java +++ b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/trainingdataset/PreparedStatementBuilder.java @@ -16,6 +16,7 @@ package io.hops.hopsworks.api.featurestore.trainingdataset; +import io.hops.hopsworks.api.featurestore.featureview.FeatureViewController; import io.hops.hopsworks.common.api.ResourceRequest; import io.hops.hopsworks.common.featurestore.FeaturestoreController; import io.hops.hopsworks.common.featurestore.feature.FeatureGroupFeatureDTO; @@ -29,6 +30,7 @@ import io.hops.hopsworks.common.featurestore.query.filter.Filter; import io.hops.hopsworks.common.featurestore.query.filter.FilterController; import io.hops.hopsworks.common.featurestore.query.filter.FilterLogic; +import io.hops.hopsworks.persistence.entity.featurestore.featureview.FeatureView; import io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlCondition; import io.hops.hopsworks.common.featurestore.trainingdatasets.TrainingDatasetController; import io.hops.hopsworks.exceptions.FeaturestoreException; @@ -51,6 +53,7 @@ import java.net.URI; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.Comparator; import java.util.List; import java.util.Map; @@ -60,14 +63,16 @@ @Stateless @TransactionAttribute(TransactionAttributeType.NEVER) public class PreparedStatementBuilder { - + // this is used to overwrite feature type in prepared statement private final static String PREPARED_STATEMENT_TYPE = "parameter"; private final static String ALIAS = "fg0"; - + @EJB private TrainingDatasetController trainingDatasetController; @EJB + private FeatureViewController featureViewController; + @EJB private FeaturestoreController featurestoreController; @EJB private OnlineFeaturestoreController onlineFeaturestoreController; @@ -77,29 +82,41 @@ public class PreparedStatementBuilder { private FeaturegroupController featuregroupController; @EJB private FilterController filterController; - + private URI uri(UriInfo uriInfo, Project project, Featurestore featurestore, TrainingDataset trainingDataset) { return uriInfo.getBaseUriBuilder().path(ResourceRequest.Name.PROJECT.toString().toLowerCase()) - .path(Integer.toString(project.getId())) - .path(ResourceRequest.Name.FEATURESTORES.toString().toLowerCase()) - .path(Integer.toString(featurestore.getId())) - .path(ResourceRequest.Name.TRAININGDATASETS.toString().toLowerCase()) - .path(Integer.toString(trainingDataset.getId())) - .path(ResourceRequest.Name.PREPAREDSTATEMENTS.toString().toLowerCase()).build(); + .path(Integer.toString(project.getId())) + .path(ResourceRequest.Name.FEATURESTORES.toString().toLowerCase()) + .path(Integer.toString(featurestore.getId())) + .path(ResourceRequest.Name.TRAININGDATASETS.toString().toLowerCase()) + .path(Integer.toString(trainingDataset.getId())) + .path(ResourceRequest.Name.PREPAREDSTATEMENTS.toString().toLowerCase()).build(); } - + + private URI uri(UriInfo uriInfo, Project project, Featurestore featurestore, FeatureView featureView) { + return uriInfo.getBaseUriBuilder().path(ResourceRequest.Name.PROJECT.toString().toLowerCase()) + .path(Integer.toString(project.getId())) + .path(ResourceRequest.Name.FEATURESTORES.toString().toLowerCase()) + .path(Integer.toString(featurestore.getId())) + .path(ResourceRequest.Name.FEATUREVIEW.toString().toLowerCase()) + .path(featureView.getName()) + .path(ResourceRequest.Name.VERSION.toString().toLowerCase()) + .path(Integer.toString(featureView.getVersion())) + .path(ResourceRequest.Name.PREPAREDSTATEMENTS.toString().toLowerCase()).build(); + } + private boolean expand(ResourceRequest resourceRequest) { return resourceRequest != null && resourceRequest.contains(ResourceRequest.Name.PREPAREDSTATEMENTS); } - + public ServingPreparedStatementDTO build(UriInfo uriInfo, ResourceRequest resourceRequest, Project project, - Users user, Featurestore featurestore, Integer trainingDatasetId, - boolean batch) throws FeaturestoreException { + Users user, Featurestore featurestore, Integer trainingDatasetId, + boolean batch) throws FeaturestoreException { TrainingDataset trainingDataset = trainingDatasetController.getTrainingDatasetById(featurestore, trainingDatasetId); - + List servingPreparedStatementDTOs = - getServingStatements(trainingDataset, project, user, batch); - + getServingStatements(trainingDataset, project, user, batch); + ServingPreparedStatementDTO servingPreparedStatementDTO = new ServingPreparedStatementDTO(); servingPreparedStatementDTO.setHref(uri(uriInfo, project, featurestore, trainingDataset)); servingPreparedStatementDTO.setExpand(expand(resourceRequest)); @@ -109,97 +126,138 @@ public ServingPreparedStatementDTO build(UriInfo uriInfo, ResourceRequest resour } return servingPreparedStatementDTO; } - + private List getServingStatements(TrainingDataset trainingDataset, Project project, - Users user, boolean batch) - throws FeaturestoreException { + Users user, boolean batch) + throws FeaturestoreException { if (!trainingDataset.isQuery()) { throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_NO_QUERY, - Level.FINE, "Inference vector is only available for datasets generated by queries"); + Level.FINE, "Inference vector is only available for datasets generated by queries"); } - - List servingPreparedStatementDTOS = new ArrayList<>(); - + List joins = trainingDatasetController.getJoinsSorted(trainingDataset); // Check that all the feature groups still exists, if not throw a reasonable error if (trainingDataset.getFeatures().stream().anyMatch(j -> j.getFeatureGroup() == null)) { - throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_QUERY_FG_DELETED, Level.FINE); + throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.QUERY_FAILED_FG_DELETED, Level.FINE); + } + + List servingPreparedStatementDTOS = + createServingPreparedStatementDTOS(joins, project, user, batch); + + return servingPreparedStatementDTOS; + } + + public ServingPreparedStatementDTO build(UriInfo uriInfo, ResourceRequest resourceRequest, Project project, + Users user, Featurestore featurestore, FeatureView featureView, + boolean batch) throws FeaturestoreException { + List servingPreparedStatementDTOs = + getServingStatements(featureView, project, user, batch); + + ServingPreparedStatementDTO servingPreparedStatementDTO = new ServingPreparedStatementDTO(); + servingPreparedStatementDTO.setHref(uri(uriInfo, project, featurestore, featureView)); + servingPreparedStatementDTO.setExpand(expand(resourceRequest)); + if (servingPreparedStatementDTO.isExpand()) { + servingPreparedStatementDTO.setItems(servingPreparedStatementDTOs); + servingPreparedStatementDTO.setCount((long) servingPreparedStatementDTOs.size()); + } + return servingPreparedStatementDTO; + } + + private List getServingStatements(FeatureView featureView, Project project, + Users user, boolean batch) + throws FeaturestoreException { + List joins = trainingDatasetController.getJoinsSorted(featureView.getJoins()); + // Check that all the feature groups still exists, if not throw a reasonable error + if (featureView.getFeatures().stream().anyMatch(j -> j.getFeatureGroup() == null)) { + throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.QUERY_FAILED_FG_DELETED, Level.FINE); } - + + List servingPreparedStatementDTOS = + createServingPreparedStatementDTOS(joins, project, user, batch); + + return servingPreparedStatementDTOS; + } + + private List createServingPreparedStatementDTOS( + Collection joins, Project project, Users user, boolean batch) + throws FeaturestoreException { + List servingPreparedStatementDTOS = new ArrayList<>(); + // each join is a feature group, iterate over them. for (TrainingDatasetJoin join : joins) { Featuregroup featuregroup = join.getFeatureGroup(); - + if ((featuregroup.getStreamFeatureGroup() != null && !featuregroup.getStreamFeatureGroup().isOnlineEnabled()) || (featuregroup.getCachedFeaturegroup() != null && !featuregroup.getCachedFeaturegroup().isOnlineEnabled())){ throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.FEATURESTORE_ONLINE_NOT_ENABLED, Level.FINE, "Inference vector is only available for training datasets generated by online enabled " + "feature groups. Feature group `" + featuregroup.getName() + "` is not online enabled."); } - + Map featureGroupFeatures = featuregroupController.getFeatures(featuregroup, project, user).stream() .collect(Collectors.toMap(FeatureGroupFeatureDTO::getName, - f -> new Feature(f.getName(), ALIAS, f.getType(), f.getPrimary(), f.getDefaultValue(), join.getPrefix()) + f -> new Feature(f.getName(), ALIAS, f.getType(), f.getPrimary(), f.getDefaultValue(), join.getPrefix(), + join.getFeatureGroup()) )); // Identify and create primary key features for this feature group. Primary key features may not be the part of // query that generated the training dataset. List primaryKeys = featureGroupFeatures.values().stream() - .filter(Feature::isPrimary) - .collect(Collectors.toList()); + .filter(Feature::isPrimary) + .collect(Collectors.toList()); if (primaryKeys.size() == 0 ) { throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.PRIMARY_KEY_REQUIRED, - Level.FINE, "Inference vector is only available for training datasets generated by feature groups with " + - "at least 1 primary key"); + Level.FINE, "Inference vector is only available for training datasets generated by feature groups with " + + "at least 1 primary key"); } - + // create td features List selectFeatures = join.getFeatures().stream() - .filter(tdf -> !tdf.isLabel()) - .sorted(Comparator.comparing(TrainingDatasetFeature::getIndex)) - .map(tdf -> featureGroupFeatures.get(tdf.getName())) - .collect(Collectors.toList()); - + .filter(tdf -> !tdf.isLabel()) + .sorted(Comparator.comparing(TrainingDatasetFeature::getIndex)) + .map(tdf -> featureGroupFeatures.get(tdf.getName())) + .collect(Collectors.toList()); + // In some cases only label(s) are used from a feature group. In this case they will not be // part of the prepared statement thus don't add to this query. if (selectFeatures.size() > 0){ // construct query for this feature group Query query = new Query( - featurestoreController.getOfflineFeaturestoreDbName(featuregroup.getFeaturestore().getProject()), - onlineFeaturestoreController.getOnlineFeaturestoreDbName(featuregroup.getFeaturestore().getProject()), - featuregroup, - ALIAS, - selectFeatures + featurestoreController.getOfflineFeaturestoreDbName(featuregroup.getFeaturestore().getProject()), + onlineFeaturestoreController.getOnlineFeaturestoreDbName(featuregroup.getFeaturestore().getProject()), + featuregroup, + ALIAS, + selectFeatures ); // construct ServingPreparedStatementDTO and add to the list servingPreparedStatementDTOS.add(buildDTO(query, primaryKeys, featuregroup.getId(), join.getIndex(), batch)); } } - + return servingPreparedStatementDTOS; } - + private ServingPreparedStatementDTO buildDTO(Query query, List primaryKeys, Integer featureGroupId, - Integer statementIndex, boolean batch) - throws FeaturestoreException { + Integer statementIndex, boolean batch) + throws FeaturestoreException { // create primary key prepared statement filters for the query List stmtParameters = new ArrayList<>(); - + // Change the type of PK to PREPARED_STATEMENT_TYPE. This will avoid having the query constructor // adding additional quotes around the ? sign primaryKeys.forEach(f -> f.setType(PREPARED_STATEMENT_TYPE)); - + // record pk position in the prepared statement - start from 1 as that's how // prepared statements work. int primaryKeyIndex = 1; - + // First condition doesn't have any "AND" // we are guaranteed there is at least one primary key, as no primary key situations are filtered above Feature pkFeature = primaryKeys.get(0); - + stmtParameters.add(new PreparedStatementParameterDTO(pkFeature.getName(), primaryKeyIndex++)); - + FilterLogic filterLogic; if (batch){ filterLogic = new FilterLogic(new Filter(primaryKeys, SqlCondition.IN, "?")); @@ -207,7 +265,7 @@ private ServingPreparedStatementDTO buildDTO(Query query, List primaryK } else { filterLogic = new FilterLogic(new Filter(Arrays.asList(pkFeature), SqlCondition.EQUALS, "?")); } - + // Concatenate conditions for (int i = 1; i < primaryKeys.size(); i++) { pkFeature = primaryKeys.get(i); @@ -215,14 +273,14 @@ private ServingPreparedStatementDTO buildDTO(Query query, List primaryK filterLogic = filterLogic.and(new Filter(Arrays.asList(pkFeature), SqlCondition.EQUALS, "?")); } stmtParameters.add( - new PreparedStatementParameterDTO(pkFeature.getName(), primaryKeyIndex++)); + new PreparedStatementParameterDTO(pkFeature.getName(), primaryKeyIndex++)); } - + query.setFilter(filterLogic); - + // set prepared statement parameters return new ServingPreparedStatementDTO(featureGroupId, statementIndex, stmtParameters, - constructorController.generateSQL(query, true) - .toSqlString(new MysqlSqlDialect(SqlDialect.EMPTY_CONTEXT)).getSql()); + constructorController.generateSQL(query, true) + .toSqlString(new MysqlSqlDialect(SqlDialect.EMPTY_CONTEXT)).getSql()); } } \ No newline at end of file diff --git a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/trainingdataset/TrainingDatasetResource.java b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/trainingdataset/TrainingDatasetResource.java index 987261a971..f7fc531db9 100644 --- a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/trainingdataset/TrainingDatasetResource.java +++ b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/trainingdataset/TrainingDatasetResource.java @@ -15,26 +15,63 @@ */ package io.hops.hopsworks.api.featurestore.trainingdataset; +import io.hops.hopsworks.api.featurestore.featureview.FeatureViewController; import io.hops.hopsworks.api.featurestore.statistics.StatisticsResource; +import io.hops.hopsworks.api.filter.AllowedProjectRoles; import io.hops.hopsworks.api.filter.Audience; +import io.hops.hopsworks.api.filter.apiKey.ApiKeyRequired; +import io.hops.hopsworks.api.jobs.JobDTO; +import io.hops.hopsworks.api.jobs.JobsBuilder; +import io.hops.hopsworks.api.jwt.JWTHelper; +import io.hops.hopsworks.common.api.ResourceRequest; +import io.hops.hopsworks.common.featurestore.OptionDTO; +import io.hops.hopsworks.common.featurestore.app.FsJobManagerController; +import io.hops.hopsworks.common.featurestore.trainingdatasets.TrainingDatasetController; import io.hops.hopsworks.common.featurestore.trainingdatasets.TrainingDatasetDTO; +import io.hops.hopsworks.common.featurestore.trainingdatasets.TrainingDatasetDTOBuilder; import io.hops.hopsworks.exceptions.FeaturestoreException; +import io.hops.hopsworks.exceptions.GenericException; +import io.hops.hopsworks.exceptions.JobException; +import io.hops.hopsworks.exceptions.ProjectException; +import io.hops.hopsworks.exceptions.ProvenanceException; +import io.hops.hopsworks.exceptions.ServiceException; import io.hops.hopsworks.jwt.annotation.JWTRequired; +import io.hops.hopsworks.persistence.entity.featurestore.Featurestore; +import io.hops.hopsworks.persistence.entity.featurestore.featureview.FeatureView; +import io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset; +import io.hops.hopsworks.persistence.entity.jobs.description.Jobs; +import io.hops.hopsworks.persistence.entity.project.Project; +import io.hops.hopsworks.persistence.entity.user.Users; +import io.hops.hopsworks.persistence.entity.user.security.apiKey.ApiScope; +import io.swagger.annotations.ApiOperation; import io.swagger.annotations.ApiParam; +import javax.ejb.EJB; import javax.ejb.TransactionAttribute; import javax.ejb.TransactionAttributeType; import javax.enterprise.context.RequestScoped; import javax.inject.Inject; import javax.servlet.http.HttpServletRequest; +import javax.ws.rs.Consumes; import javax.ws.rs.DELETE; +import javax.ws.rs.DefaultValue; import javax.ws.rs.GET; import javax.ws.rs.POST; +import javax.ws.rs.PUT; import javax.ws.rs.Path; import javax.ws.rs.PathParam; +import javax.ws.rs.Produces; +import javax.ws.rs.QueryParam; import javax.ws.rs.core.Context; +import javax.ws.rs.core.GenericEntity; +import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; import javax.ws.rs.core.SecurityContext; +import javax.ws.rs.core.UriInfo; +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; @RequestScoped @TransactionAttribute(TransactionAttributeType.NEVER) @@ -42,53 +79,119 @@ public class TrainingDatasetResource { @Inject private StatisticsResource statisticsResource; + @EJB + private FeatureViewController featureViewController; + @EJB + private JWTHelper jWTHelper; + @EJB + private FsJobManagerController fsJobManagerController; + @EJB + private JobsBuilder jobsBuilder; + @EJB + private TrainingDatasetController trainingDatasetController; + @EJB + private TrainingDatasetDTOBuilder trainingDatasetDTOBuilder; + + private Featurestore featurestore; + private FeatureView featureView; + private Project project; @POST + @Produces(MediaType.APPLICATION_JSON) + @Consumes(MediaType.APPLICATION_JSON) + @AllowedProjectRoles({AllowedProjectRoles.DATA_OWNER, AllowedProjectRoles.DATA_SCIENTIST}) @JWTRequired(acceptedTokens = {Audience.API, Audience.JOB}, allowedUserRoles = {"HOPS_ADMIN", "HOPS_USER"}) + @ApiKeyRequired(acceptedScopes = {ApiScope.FEATURESTORE}, allowedUserRoles = {"HOPS_ADMIN", "HOPS_USER"}) + @ApiOperation(value = "Create a training dataset.", response = TrainingDatasetDTO.class) public Response create( @Context SecurityContext sc, @Context HttpServletRequest req, + @Context + UriInfo uriInfo, TrainingDatasetDTO trainingDatasetDTO) - throws FeaturestoreException { - // This endpoint is used by both crateTrainingDataset and reproduceTrainingDataset. - // It should check if dataset of a given version has existed before launching a job. - return Response.ok().entity(new TrainingDatasetDTO()).build(); + throws FeaturestoreException, ProvenanceException, IOException, ServiceException { + Users user = jWTHelper.getUserPrincipal(sc); + TrainingDatasetDTO createdTrainingDatasetDTO = + trainingDatasetController.createTrainingDataset(user, project, featurestore, featureView, trainingDatasetDTO); + return Response.created(uriInfo.getRequestUri()).entity(createdTrainingDatasetDTO).build(); } @GET - @JWTRequired(acceptedTokens = {Audience.API, Audience.JOB}, allowedUserRoles = {"HOPS_ADMIN", "HOPS_USER"}) + @Produces(MediaType.APPLICATION_JSON) + @AllowedProjectRoles({AllowedProjectRoles.DATA_OWNER, AllowedProjectRoles.DATA_SCIENTIST}) + @JWTRequired(acceptedTokens = {Audience.API}, allowedUserRoles = {"HOPS_ADMIN", "HOPS_USER"}) + @ApiKeyRequired(acceptedScopes = {ApiScope.FEATURESTORE}, allowedUserRoles = {"HOPS_ADMIN", "HOPS_USER"}) + @ApiOperation(value = "Get the list of training datasets.", + response = TrainingDatasetDTO.class, responseContainer = "List") public Response getAll( @Context SecurityContext sc, + @Context + UriInfo uriInfo, @Context HttpServletRequest req - ) { - return Response.ok().entity(new TrainingDatasetDTO()).build(); - + ) throws FeaturestoreException, ServiceException { + Users user = jWTHelper.getUserPrincipal(sc); + List trainingDatasets = + trainingDatasetController.getTrainingDatasetByFeatureView(featureView); + TrainingDatasetDTO trainingDatasetDTO = trainingDatasetDTOBuilder.build(user, project, trainingDatasets, + uriInfo); + return Response.ok().entity(trainingDatasetDTO).build(); } @GET - @Path("/{version: [0-9]+}") + @Path("/version/{version: [0-9]+}") + @Produces(MediaType.APPLICATION_JSON) + @AllowedProjectRoles({AllowedProjectRoles.DATA_OWNER, AllowedProjectRoles.DATA_SCIENTIST}) @JWTRequired(acceptedTokens = {Audience.API, Audience.JOB}, allowedUserRoles = {"HOPS_ADMIN", "HOPS_USER"}) + @ApiKeyRequired(acceptedScopes = {ApiScope.FEATURESTORE}, allowedUserRoles = {"HOPS_ADMIN", "HOPS_USER"}) + @ApiOperation(value = "Get a training datasets with a specific name and version.", response = List.class) public Response getByVersion( @Context SecurityContext sc, @Context HttpServletRequest req, + @Context + UriInfo uriInfo, @ApiParam(value = "training dataset version") @PathParam("version") Integer version - ) { - return Response.ok().entity(new TrainingDatasetDTO()).build(); + ) throws FeaturestoreException, ServiceException { + Users user = jWTHelper.getUserPrincipal(sc); + TrainingDataset trainingDataset = trainingDatasetController.getTrainingDatasetByFeatureViewAndVersion(featureView, + version); + TrainingDatasetDTO trainingDatasetDTO = trainingDatasetDTOBuilder.build(user, project, trainingDataset, uriInfo); + return Response.ok().entity(trainingDatasetDTO).build(); } @DELETE - @Path("/{version: [0-9]+}") + @Produces(MediaType.APPLICATION_JSON) + @AllowedProjectRoles({AllowedProjectRoles.DATA_OWNER, AllowedProjectRoles.DATA_SCIENTIST}) @JWTRequired(acceptedTokens = {Audience.API, Audience.JOB}, allowedUserRoles = {"HOPS_ADMIN", "HOPS_USER"}) + @ApiKeyRequired(acceptedScopes = {ApiScope.FEATURESTORE}, allowedUserRoles = {"HOPS_ADMIN", "HOPS_USER"}) + @ApiOperation(value = "Delete all training datasets.", response = TrainingDatasetDTO.class) public Response delete( + @Context + SecurityContext sc, + @Context + HttpServletRequest req + ) throws FeaturestoreException { + Users user = jWTHelper.getUserPrincipal(sc); + trainingDatasetController.delete(user, project, featurestore, featureView); + return Response.ok().build(); + } + + @DELETE + @Path("/version/{version: [0-9]+}") + @Produces(MediaType.APPLICATION_JSON) + @AllowedProjectRoles({AllowedProjectRoles.DATA_OWNER, AllowedProjectRoles.DATA_SCIENTIST}) + @JWTRequired(acceptedTokens = {Audience.API, Audience.JOB}, allowedUserRoles = {"HOPS_ADMIN", "HOPS_USER"}) + @ApiKeyRequired(acceptedScopes = {ApiScope.FEATURESTORE}, allowedUserRoles = {"HOPS_ADMIN", "HOPS_USER"}) + @ApiOperation(value = "Delete a specific version of training dataset .", response = TrainingDatasetDTO.class) + public Response deleteByVersion( @Context SecurityContext sc, @Context @@ -96,16 +199,98 @@ public Response delete( @ApiParam(value = "training dataset version") @PathParam("version") Integer version - ) { - return Response.ok().entity(new TrainingDatasetDTO()).build(); + ) throws FeaturestoreException { + Users user = jWTHelper.getUserPrincipal(sc); + trainingDatasetController.delete(user, project, featurestore, featureView, version); + return Response.ok().build(); + } + @DELETE + @Path("/data") + @Produces(MediaType.APPLICATION_JSON) + @AllowedProjectRoles({AllowedProjectRoles.DATA_OWNER, AllowedProjectRoles.DATA_SCIENTIST}) + @JWTRequired(acceptedTokens = {Audience.API, Audience.JOB}, allowedUserRoles = {"HOPS_ADMIN", "HOPS_USER"}) + @ApiKeyRequired(acceptedScopes = {ApiScope.FEATURESTORE}, allowedUserRoles = {"HOPS_ADMIN", "HOPS_USER"}) + @ApiOperation(value = "Delete all data only of training datasets.", response = TrainingDatasetDTO.class) + public Response deleteDataOnly( + @Context + SecurityContext sc, + @Context + HttpServletRequest req + ) throws FeaturestoreException { + Users user = jWTHelper.getUserPrincipal(sc); + trainingDatasetController.deleteDataOnly(user, project, featurestore, featureView); + return Response.ok().build(); } - //Pagination, StatisticsBeanParam - @GET - @Path("/{version: [0-9]+}/statistics") + @DELETE + @Path("/version/{version: [0-9]+}/data") + @JWTRequired(acceptedTokens = {Audience.API, Audience.JOB}, allowedUserRoles = {"HOPS_ADMIN", "HOPS_USER"}) + @ApiKeyRequired(acceptedScopes = {ApiScope.FEATURESTORE}, allowedUserRoles = {"HOPS_ADMIN", "HOPS_USER"}) + @ApiOperation(value = "Delete data only of a specific version of training dataset.", + response = TrainingDatasetDTO.class) + public Response deleteDataOnlyByVersion( + @Context + SecurityContext sc, + @Context + HttpServletRequest req, + @ApiParam(value = "training dataset version") + @PathParam("version") + Integer version + ) throws FeaturestoreException { + Users user = jWTHelper.getUserPrincipal(sc); + trainingDatasetController.deleteDataOnly(user, project, featurestore, featureView, version); + return Response.ok().build(); + } + + @PUT + @Path("/version/{version: [0-9]+}") + @Produces(MediaType.APPLICATION_JSON) + @Consumes(MediaType.APPLICATION_JSON) + @AllowedProjectRoles({AllowedProjectRoles.DATA_OWNER, AllowedProjectRoles.DATA_SCIENTIST}) + @JWTRequired(acceptedTokens = {Audience.API, Audience.JOB}, allowedUserRoles = {"HOPS_ADMIN", "HOPS_USER"}) + @ApiKeyRequired(acceptedScopes = {ApiScope.FEATURESTORE}, allowedUserRoles = {"HOPS_ADMIN", "HOPS_USER"}) + @ApiOperation(value = "Update a training dataset.", response = TrainingDatasetDTO.class) + public Response updateTrainingDataset(@Context SecurityContext sc, + @Context HttpServletRequest req, + @ApiParam(value = "updateMetadata", example = "true") + @QueryParam("updateMetadata") @DefaultValue("false") + Boolean updateMetadata, + @ApiParam(value = "updateStatsConfig", example = "true") + @QueryParam("updateStatsConfig") @DefaultValue("false") + Boolean updateStatsConfig, + @PathParam("version") + Integer version, + TrainingDatasetDTO trainingDatasetDTO) + throws FeaturestoreException, ServiceException { + if (trainingDatasetDTO == null) { + throw new IllegalArgumentException("Input JSON for updating a Training Dataset cannot be null"); + } + + Users user = jWTHelper.getUserPrincipal(sc); + trainingDatasetDTO.setVersion(version); + TrainingDataset trainingDataset = trainingDatasetController.getTrainingDatasetByFeatureViewAndVersion(featureView, + version); + trainingDatasetDTO.setId(trainingDataset.getId()); + TrainingDatasetDTO oldTrainingDatasetDTO = trainingDatasetController.convertTrainingDatasetToDTO(user, project, + trainingDataset); + + if (updateMetadata) { + oldTrainingDatasetDTO = + trainingDatasetController.updateTrainingDatasetMetadata(user, project, featurestore, trainingDatasetDTO); + } + if (updateStatsConfig) { + oldTrainingDatasetDTO = + trainingDatasetController.updateTrainingDatasetStatsConfig(user, project, featurestore, trainingDatasetDTO); + } + GenericEntity trainingDatasetDTOGenericEntity = + new GenericEntity(oldTrainingDatasetDTO) {}; + return Response.ok().entity(trainingDatasetDTOGenericEntity).build(); + } + + @Path("/version/{version: [0-9]+}/statistics") @JWTRequired(acceptedTokens = {Audience.API, Audience.JOB}, allowedUserRoles = {"HOPS_ADMIN", "HOPS_USER"}) - public StatisticsResource getStatistics( + public StatisticsResource statistics( @Context SecurityContext sc, @Context @@ -113,11 +298,54 @@ public StatisticsResource getStatistics( @ApiParam(value = "training dataset version") @PathParam("version") Integer version - ) { + ) throws FeaturestoreException { + statisticsResource.setProject(project); + statisticsResource.setFeaturestore(featurestore); + TrainingDataset trainingDataset = trainingDatasetController.getTrainingDatasetByFeatureViewAndVersion( + featureView, version); + statisticsResource.setTrainingDataset(trainingDataset); return statisticsResource; } - public void setFeatureView(String name, Integer version) { + @POST + @Path("/version/{version: [0-9]+}/compute") + @Consumes(MediaType.APPLICATION_JSON) + @Produces(MediaType.APPLICATION_JSON) + @ApiOperation(value = "Setup a job to compute and write a training dataset", response = JobDTO.class) + @AllowedProjectRoles({AllowedProjectRoles.DATA_OWNER, AllowedProjectRoles.DATA_SCIENTIST}) + @JWTRequired(acceptedTokens = {Audience.API, Audience.JOB}, allowedUserRoles = {"HOPS_ADMIN", "HOPS_USER"}) + @ApiKeyRequired(acceptedScopes = {ApiScope.DATASET_VIEW, ApiScope.FEATURESTORE}, + allowedUserRoles = {"HOPS_ADMIN", "HOPS_USER"}) + public Response compute(@Context UriInfo uriInfo, + @Context HttpServletRequest req, + @Context SecurityContext sc, + @PathParam("version") Integer trainingDatasetVersion, + TrainingDatasetJobConf trainingDatasetJobConf) + throws FeaturestoreException, ServiceException, JobException, ProjectException, GenericException { + Users user = jWTHelper.getUserPrincipal(sc); + + Map writeOptions = null; + if (trainingDatasetJobConf.getWriteOptions() != null) { + writeOptions = trainingDatasetJobConf.getWriteOptions() + .stream().collect(Collectors.toMap(OptionDTO::getName, OptionDTO::getValue)); + } + + Jobs job = fsJobManagerController.setupTrainingDatasetJob(project, user, featureView, trainingDatasetVersion, + trainingDatasetJobConf.getOverwrite(), writeOptions, trainingDatasetJobConf.getSparkJobConfiguration()); + JobDTO jobDTO = jobsBuilder.build(uriInfo, new ResourceRequest(ResourceRequest.Name.JOBS), job); + + return Response.created(jobDTO.getHref()).entity(jobDTO).build(); + } + + public void setFeatureView(String name, Integer version) throws FeaturestoreException { + featureView = featureViewController.getByNameVersionAndFeatureStore(name, version, featurestore); + } + public void setProject(Project project) { + this.project = project; + } + public void setFeaturestore(Featurestore featurestore) { + this.featurestore = featurestore; } + } diff --git a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/trainingdataset/TrainingDatasetService.java b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/trainingdataset/TrainingDatasetService.java index 255ada0714..072daf2f7f 100644 --- a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/trainingdataset/TrainingDatasetService.java +++ b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/trainingdataset/TrainingDatasetService.java @@ -208,8 +208,6 @@ public Response create(@Context SecurityContext sc, TrainingDatasetDTO trainingD Users user = jWTHelper.getUserPrincipal(sc); TrainingDatasetDTO createdTrainingDatasetDTO = trainingDatasetController.createTrainingDataset(user, project, featurestore, trainingDatasetDTO); - activityFacade.persistActivity(ActivityFacade.CREATED_TRAINING_DATASET + - createdTrainingDatasetDTO.getName(), project, user, ActivityFlag.SERVICE); GenericEntity createdTrainingDatasetDTOGeneric = new GenericEntity(createdTrainingDatasetDTO) {}; return noCacheResponse.getNoCacheResponseBuilder(Response.Status.CREATED).entity(createdTrainingDatasetDTOGeneric) @@ -354,8 +352,6 @@ public Response updateTrainingDataset(@Context SecurityContext sc, if(updateMetadata){ oldTrainingDatasetDTO = trainingDatasetController.updateTrainingDatasetMetadata(user, project, featurestore, trainingDatasetDTO); - activityFacade.persistActivity(ActivityFacade.EDITED_TRAINING_DATASET + trainingDatasetDTO.getName(), - project, user, ActivityFlag.SERVICE); } if (updateStatsConfig) { oldTrainingDatasetDTO = diff --git a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/transformation/TransformationResource.java b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/transformation/TransformationResource.java index 915d366842..fbb8a98384 100644 --- a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/transformation/TransformationResource.java +++ b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/transformation/TransformationResource.java @@ -16,9 +16,11 @@ package io.hops.hopsworks.api.featurestore.transformation; -import io.hops.hopsworks.api.featurestore.statistics.StatisticsResource; +import io.hops.hopsworks.api.featurestore.featureview.FeatureViewController; import io.hops.hopsworks.api.featurestore.transformationFunction.TransformationFunctionBuilder; +import io.hops.hopsworks.api.filter.AllowedProjectRoles; import io.hops.hopsworks.api.filter.Audience; +import io.hops.hopsworks.api.filter.apiKey.ApiKeyRequired; import io.hops.hopsworks.api.jwt.JWTHelper; import io.hops.hopsworks.common.api.ResourceRequest; import io.hops.hopsworks.common.featurestore.trainingdatasets.TrainingDatasetController; @@ -26,23 +28,19 @@ import io.hops.hopsworks.exceptions.FeaturestoreException; import io.hops.hopsworks.jwt.annotation.JWTRequired; import io.hops.hopsworks.persistence.entity.featurestore.Featurestore; -import io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset; +import io.hops.hopsworks.persistence.entity.featurestore.featureview.FeatureView; import io.hops.hopsworks.persistence.entity.project.Project; import io.hops.hopsworks.persistence.entity.user.Users; -import io.hops.hopsworks.restutils.RESTCodes; +import io.hops.hopsworks.persistence.entity.user.security.apiKey.ApiScope; import io.swagger.annotations.Api; import io.swagger.annotations.ApiOperation; -import io.swagger.annotations.ApiParam; import javax.ejb.EJB; import javax.ejb.TransactionAttribute; import javax.ejb.TransactionAttributeType; import javax.enterprise.context.RequestScoped; -import javax.inject.Inject; import javax.servlet.http.HttpServletRequest; import javax.ws.rs.GET; -import javax.ws.rs.Path; -import javax.ws.rs.PathParam; import javax.ws.rs.core.Context; import javax.ws.rs.core.Response; import javax.ws.rs.core.SecurityContext; @@ -53,22 +51,23 @@ @Api(value = "Transformation Resource") public class TransformationResource { + @EJB + private FeatureViewController featureViewController; @EJB private TrainingDatasetController trainingDatasetController; @EJB private TransformationFunctionBuilder transformationFunctionBuilder; @EJB private JWTHelper jWTHelper; - @Inject - private StatisticsResource statisticsResource; + private Project project; private Featurestore featurestore; - private Integer trainingDatasetId; + private FeatureView featureView; - // Copy from traindatasetService @GET - @Path("/functions") + @AllowedProjectRoles({AllowedProjectRoles.DATA_OWNER, AllowedProjectRoles.DATA_SCIENTIST}) @JWTRequired(acceptedTokens = {Audience.API, Audience.JOB}, allowedUserRoles = {"HOPS_ADMIN", "HOPS_USER"}) + @ApiKeyRequired(acceptedScopes = {ApiScope.FEATURESTORE}, allowedUserRoles = {"HOPS_ADMIN", "HOPS_USER"}) @ApiOperation(value = "Get Transformation functions.", response = TransformationFunctionAttachedDTO.class) public Response getTransformationFunction( @Context @@ -79,32 +78,23 @@ public Response getTransformationFunction( UriInfo uriInfo ) throws FeaturestoreException { - if (trainingDatasetId == null) { - throw new IllegalArgumentException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_ID_NOT_PROVIDED.getMessage()); - } - TrainingDataset trainingDataset = trainingDatasetController.getTrainingDatasetById(featurestore, trainingDatasetId); Users user = jWTHelper.getUserPrincipal(sc); ResourceRequest resourceRequest = new ResourceRequest(ResourceRequest.Name.TRANSFORMATIONFUNCTIONS); TransformationFunctionAttachedDTO transformationFunctionAttachedDTO = - transformationFunctionBuilder.build(uriInfo, resourceRequest, user, project, trainingDataset); + transformationFunctionBuilder.build(uriInfo, resourceRequest, user, project, featureView); return Response.ok().entity(transformationFunctionAttachedDTO).build(); } - @Path("/statistics") - @JWTRequired(acceptedTokens = {Audience.API, Audience.JOB}, allowedUserRoles = {"HOPS_ADMIN", "HOPS_USER"}) - public StatisticsResource statistics( - @Context - SecurityContext sc, - @Context - HttpServletRequest req, - @ApiParam(value = "Name of the feature view", required = true) - @PathParam("name") - String featureViewName, - @ApiParam(value = "Version of the feature view", required = true) - @PathParam("version") - Integer version - ) { - return statisticsResource; + public void setFeatureView(String name, Integer version) throws FeaturestoreException { + featureView = featureViewController.getByNameVersionAndFeatureStore(name, version, featurestore); + } + + public void setProject(Project project) { + this.project = project; + } + + public void setFeaturestore(Featurestore featurestore) { + this.featurestore = featurestore; } } diff --git a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/transformationFunction/TransformationFunctionBuilder.java b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/transformationFunction/TransformationFunctionBuilder.java index 94bc41e212..6e0eba2342 100644 --- a/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/transformationFunction/TransformationFunctionBuilder.java +++ b/hopsworks-api/src/main/java/io/hops/hopsworks/api/featurestore/transformationFunction/TransformationFunctionBuilder.java @@ -25,6 +25,7 @@ import io.hops.hopsworks.common.featurestore.transformationFunction.TransformationFunctionFacade; import io.hops.hopsworks.exceptions.FeaturestoreException; import io.hops.hopsworks.persistence.entity.featurestore.Featurestore; +import io.hops.hopsworks.persistence.entity.featurestore.featureview.FeatureView; import io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset; import io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFeature; import io.hops.hopsworks.persistence.entity.featurestore.transformationFunction.TransformationFunction; @@ -71,6 +72,19 @@ private URI uri(UriInfo uriInfo, Project project, Featurestore featurestore, Tra .path(ResourceRequest.Name.TRANSFORMATIONFUNCTIONS.toString().toLowerCase()).build(); } + private URI uri(UriInfo uriInfo, Project project, FeatureView featureView) { + return uriInfo.getBaseUriBuilder().path(ResourceRequest.Name.PROJECT.toString().toLowerCase()) + .path(Integer.toString(project.getId())) + .path(ResourceRequest.Name.FEATURESTORES.toString().toLowerCase()) + .path(Integer.toString(featureView.getFeaturestore().getId())) + .path(ResourceRequest.Name.FEATUREVIEW.toString().toLowerCase()) + .path(featureView.getName()) + .path(ResourceRequest.Name.VERSION.toString().toLowerCase()) + .path(String.valueOf(featureView.getVersion())) + .path(ResourceRequest.Name.TRANSFORMATION.toString().toLowerCase()) + .build(); + } + private boolean expand(ResourceRequest resourceRequest) { return resourceRequest != null && resourceRequest.contains(ResourceRequest.Name.TRANSFORMATIONFUNCTIONS); } @@ -143,10 +157,22 @@ public TransformationFunctionDTO build(UriInfo uriInfo, ResourceRequest resource return transformationFunctionDTO; } - public TransformationFunctionAttachedDTO build (UriInfo uriInfo, ResourceRequest resourceRequest, Users user, + public TransformationFunctionAttachedDTO build(UriInfo uriInfo, ResourceRequest resourceRequest, Users user, Project project, TrainingDataset trainingDataset, TrainingDatasetFeature tdFeature) throws FeaturestoreException { + return build(resourceRequest, user, project, + uri(uriInfo, project, trainingDataset.getFeaturestore(), trainingDataset), tdFeature); + } + + public TransformationFunctionAttachedDTO build(UriInfo uriInfo, ResourceRequest resourceRequest, Users user, + Project project, FeatureView featureView, + TrainingDatasetFeature tdFeature) throws FeaturestoreException { + return build(resourceRequest, user, project, + uri(uriInfo, project, featureView.getFeaturestore()), tdFeature); + } + private TransformationFunctionAttachedDTO build(ResourceRequest resourceRequest, Users user, + Project project, URI uri, TrainingDatasetFeature tdFeature) throws FeaturestoreException { TransformationFunctionAttachedDTO transformationFunctionAttachedDTO = new TransformationFunctionAttachedDTO(); TransformationFunctionDTO transformationFunctionDTO = new TransformationFunctionDTO( tdFeature.getTransformationFunction().getId(), @@ -156,8 +182,7 @@ public TransformationFunctionAttachedDTO build (UriInfo uriInfo, ResourceRequest transformationFunctionController.readContent(user, project, tdFeature.getTransformationFunction()), tdFeature.getTransformationFunction().getFeaturestore().getId()); - transformationFunctionAttachedDTO.setHref(uri(uriInfo, project, trainingDataset.getFeaturestore(), - trainingDataset)); + transformationFunctionAttachedDTO.setHref(uri); transformationFunctionAttachedDTO.setExpand(expand(resourceRequest)); if (transformationFunctionAttachedDTO.isExpand()) { transformationFunctionAttachedDTO.setName(trainingDatasetController.checkPrefix(tdFeature)); @@ -167,7 +192,7 @@ public TransformationFunctionAttachedDTO build (UriInfo uriInfo, ResourceRequest return transformationFunctionAttachedDTO; } - public TransformationFunctionAttachedDTO build (UriInfo uriInfo, ResourceRequest resourceRequest, Users user, + public TransformationFunctionAttachedDTO build(UriInfo uriInfo, ResourceRequest resourceRequest, Users user, Project project, TrainingDataset trainingDataset) throws FeaturestoreException { @@ -190,4 +215,27 @@ public TransformationFunctionAttachedDTO build (UriInfo uriInfo, ResourceRequest return transformationFunctionAttachedDTO; } + + public TransformationFunctionAttachedDTO build(UriInfo uriInfo, ResourceRequest resourceRequest, Users user, + Project project, FeatureView featureView) + throws FeaturestoreException { + + TransformationFunctionAttachedDTO transformationFunctionAttachedDTO = new TransformationFunctionAttachedDTO(); + transformationFunctionAttachedDTO.setHref(uri(uriInfo, project, featureView)); + transformationFunctionAttachedDTO.setExpand(expand(resourceRequest)); + if (transformationFunctionAttachedDTO.isExpand()) { + List list = new ArrayList<>(); + for (TrainingDatasetFeature tdFeature: featureView.getFeatures()){ + if (tdFeature.getTransformationFunction() != null){ + TransformationFunctionAttachedDTO build = build(uriInfo, resourceRequest, user, project, featureView, + tdFeature); + list.add(build); + } + } + transformationFunctionAttachedDTO.setItems(list); + transformationFunctionAttachedDTO.setCount((long)list.size()); + } + + return transformationFunctionAttachedDTO; + } } diff --git a/hopsworks-common/src/main/java/io/hops/hopsworks/common/api/ResourceRequest.java b/hopsworks-common/src/main/java/io/hops/hopsworks/common/api/ResourceRequest.java index 8f871ed113..2c068c6bc4 100644 --- a/hopsworks-common/src/main/java/io/hops/hopsworks/common/api/ResourceRequest.java +++ b/hopsworks-common/src/main/java/io/hops/hopsworks/common/api/ResourceRequest.java @@ -188,6 +188,7 @@ public enum Name { MODELS, HOSTS, QUERY, + QUERY_STRING, TAG_SCHEMAS, TAGS, SERVICES, @@ -226,7 +227,9 @@ public enum Name { ADMIN, EXECUTION, FEATURES, - FEATUREVIEW; + FEATUREVIEW, + VERSION, + TRANSFORMATION; public static Name fromString(String name) { return valueOf(name.toUpperCase()); diff --git a/hopsworks-common/src/main/java/io/hops/hopsworks/common/dao/user/activity/ActivityFacade.java b/hopsworks-common/src/main/java/io/hops/hopsworks/common/dao/user/activity/ActivityFacade.java index 967f201c43..49a4e07bc0 100755 --- a/hopsworks-common/src/main/java/io/hops/hopsworks/common/dao/user/activity/ActivityFacade.java +++ b/hopsworks-common/src/main/java/io/hops/hopsworks/common/dao/user/activity/ActivityFacade.java @@ -84,7 +84,8 @@ public class ActivityFacade extends AbstractFacade { public static final String CREATED_FEATUREGROUP = " created a new feature group named "; public static final String CREATED_TRAINING_DATASET = " created a new training dataset named "; public static final String DELETED_FEATUREGROUP = " deleted a feature group named "; - public static final String DELETED_TRAINING_DATASET = " deleted a training dataset named "; + public static final String DELETED_TRAINING_DATASET = " deleted a training dataset(data and metadata) named "; + public static final String DELETED_TRAINING_DATASET_DATA_ONLY = " deleted a training dataset(data only) named "; public static final String CREATED_NEW_VERSION_OF_FEATUREGROUP = " created a new version of a feature group named "; public static final String EDITED_FEATUREGROUP = " edited feature group named "; public static final String EDITED_TRAINING_DATASET = " edited training dataset named "; @@ -98,6 +99,7 @@ public class ActivityFacade extends AbstractFacade { "the featurestore with id: "; public static final String CREATED_FEATURE_VIEW = " created a new feature view "; public static final String DELETED_FEATURE_VIEW = " deleted a feature view "; + public static final String EDITED_FEATURE_VIEW = " edited a feature view "; @PersistenceContext(unitName = "kthfsPU") private EntityManager em; diff --git a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/FeaturestoreController.java b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/FeaturestoreController.java index 9a0ec942d4..4a77f0ad0d 100644 --- a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/FeaturestoreController.java +++ b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/FeaturestoreController.java @@ -67,7 +67,7 @@ @Stateless @TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED) public class FeaturestoreController { - + @EJB private FeaturestoreFacade featurestoreFacade; @EJB @@ -118,7 +118,7 @@ public List getFeaturestoresForProject(Project project) throws throw ex; } } - + /** * Return the feature store dataset for the specific project. not the shared ones. * @param project @@ -131,7 +131,7 @@ public Dataset getProjectFeaturestoreDataset(Project project) throws Featurestor .orElseThrow(() -> new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.FEATURESTORE_NOT_FOUND, Level.INFO, "Could not find feature store for project: " + project.getName())); } - + /** * Return the feature store for the specific project. not the shared ones. * @param project diff --git a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/FeaturestoreEntityDTO.java b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/FeaturestoreEntityDTO.java index b3f914d4b6..32373d0676 100644 --- a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/FeaturestoreEntityDTO.java +++ b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/FeaturestoreEntityDTO.java @@ -19,6 +19,7 @@ import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.annotation.JsonSubTypes; import com.fasterxml.jackson.annotation.JsonTypeInfo; +import io.hops.hopsworks.common.api.RestDTO; import io.hops.hopsworks.common.dao.user.UserDTO; import io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupDTO; import io.hops.hopsworks.common.featurestore.statistics.StatisticsConfigDTO; @@ -41,7 +42,7 @@ @JsonSubTypes({ @JsonSubTypes.Type(value = FeaturegroupDTO.class, name = "FeaturegroupDTO"), @JsonSubTypes.Type(value = TrainingDatasetDTO.class, name = "TrainingDatasetDTO")}) -public abstract class FeaturestoreEntityDTO { +public abstract class FeaturestoreEntityDTO extends RestDTO { private Integer featurestoreId; private String featurestoreName; diff --git a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/activity/FeaturestoreActivityFacade.java b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/activity/FeaturestoreActivityFacade.java index 2bfef74f59..d5cbd372da 100644 --- a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/activity/FeaturestoreActivityFacade.java +++ b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/activity/FeaturestoreActivityFacade.java @@ -23,6 +23,7 @@ import io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup; import io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.FeatureGroupCommit; import io.hops.hopsworks.persistence.entity.featurestore.featuregroup.datavalidation.FeatureGroupValidation; +import io.hops.hopsworks.persistence.entity.featurestore.featureview.FeatureView; import io.hops.hopsworks.persistence.entity.featurestore.statistics.FeaturestoreStatistic; import io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset; import io.hops.hopsworks.persistence.entity.jobs.history.Execution; @@ -65,10 +66,12 @@ public void logMetadataActivity(Users user, Featuregroup featuregroup, em.persist(fsActivity); } - public void logMetadataActivity(Users user, TrainingDataset trainingDataset, + public void logMetadataActivity(Users user, TrainingDataset trainingDataset, FeatureView featureView, FeaturestoreActivityMeta metadataType) { FeaturestoreActivity fsActivity = new FeaturestoreActivity(); fsActivity.setType(ActivityType.METADATA); + //TODO: set activity after merging of activityresource +// fsActivity.setfea fsActivity.setTrainingDataset(trainingDataset); fsActivity.setUser(user); fsActivity.setEventTime(new Date()); @@ -76,6 +79,17 @@ public void logMetadataActivity(Users user, TrainingDataset trainingDataset, em.persist(fsActivity); } + public void logMetadataActivity(Users user, FeatureView featureView, + FeaturestoreActivityMeta metadataType) { + FeaturestoreActivity fsActivity = new FeaturestoreActivity(); + fsActivity.setType(ActivityType.METADATA); + fsActivity.setFeatureView(featureView); + fsActivity.setUser(user); + fsActivity.setEventTime(new Date()); + fsActivity.setActivityMeta(metadataType); + em.persist(fsActivity); + } + public void logStatisticsActivity(Users user, Featuregroup featuregroup, Date eventTime, FeaturestoreStatistic statistics) { FeaturestoreActivity fsActivity = new FeaturestoreActivity(); @@ -229,6 +243,28 @@ public CollectionInfo findByTrainingDataset(TrainingDatase return new CollectionInfo((Long) queryCount.getSingleResult(), query.getResultList()); } + public CollectionInfo findByFeatureView(FeatureView featureView, Integer offset, + Integer limit, + Set filters, + Set sorts) { + + String queryStr = buildQuery("SELECT a FROM FeaturestoreActivity a ", + filters, sorts, "a.featureView = :featureView"); + String queryCountStr = buildQuery("SELECT COUNT(DISTINCT a.id) FROM FeaturestoreActivity a ", + filters, sorts, "a.featureView = :featureView"); + + Query query = em.createQuery(queryStr, FeaturestoreActivity.class) + .setParameter("featureView", featureView); + setFilters(filters, query); + setOffsetAndLim(offset, limit, query); + + Query queryCount = em.createQuery(queryCountStr, FeaturestoreActivity.class) + .setParameter("featureView", featureView); + setFilters(filters, queryCount); + + return new CollectionInfo((Long) queryCount.getSingleResult(), query.getResultList()); + } + private void setFilters(Set filters, Query query) { if (filters == null || filters.isEmpty()) { return; diff --git a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/app/FsJobManagerController.java b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/app/FsJobManagerController.java index b3304ced98..01da16b7ae 100644 --- a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/app/FsJobManagerController.java +++ b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/app/FsJobManagerController.java @@ -23,7 +23,11 @@ import io.hops.hopsworks.common.featurestore.featuregroup.IngestionDataFormat; import io.hops.hopsworks.common.featurestore.featuregroup.IngestionJob; import io.hops.hopsworks.common.featurestore.featuregroup.stream.DeltaStreamerJobConf; +import io.hops.hopsworks.common.featurestore.query.Query; +import io.hops.hopsworks.common.featurestore.query.QueryBuilder; +import io.hops.hopsworks.common.featurestore.query.QueryController; import io.hops.hopsworks.common.featurestore.query.QueryDTO; +import io.hops.hopsworks.common.featurestore.trainingdatasets.TrainingDatasetController; import io.hops.hopsworks.common.hdfs.DistributedFileSystemOps; import io.hops.hopsworks.common.hdfs.DistributedFsService; import io.hops.hopsworks.common.hdfs.HdfsUsersController; @@ -42,6 +46,7 @@ import io.hops.hopsworks.persistence.entity.dataset.DatasetAccessPermission; import io.hops.hopsworks.persistence.entity.featurestore.Featurestore; import io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup; +import io.hops.hopsworks.persistence.entity.featurestore.featureview.FeatureView; import io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset; import io.hops.hopsworks.persistence.entity.jobs.configuration.JobType; import io.hops.hopsworks.persistence.entity.jobs.configuration.spark.SparkJobConfiguration; @@ -83,12 +88,19 @@ public class FsJobManagerController { private ExecutionController executionController; @EJB private Settings settings; + @EJB + private TrainingDatasetController trainingDatasetController; + @EJB + private QueryController queryController; + @EJB + private QueryBuilder queryBuilder; private ObjectMapper objectMapper = new ObjectMapper(); private SimpleDateFormat formatter = new SimpleDateFormat("ddMMyyyyHHmmss"); private final static String INSERT_FG_OP = "insert_fg"; private final static String TRAINING_DATASET_OP = "create_td"; + private final static String FEATURE_VIEW_TRAINING_DATASET_OP = "create_fv_td"; private final static String COMPUTE_STATS_OP = "compute_stats"; private final static String DELTA_STREAMER_OP = "offline_fg_backfill"; @@ -206,32 +218,65 @@ public Jobs setupStatisticsJob(Project project, Users user, Featurestore feature } } + public Jobs setupTrainingDatasetJob(Project project, Users user, FeatureView featureView, + Integer trainingDatasetVersion, + Boolean overwrite, Map writeOptions, SparkJobConfiguration sparkJobConfiguration) + throws FeaturestoreException, JobException, GenericException, ProjectException, ServiceException { + TrainingDataset trainingDataset = trainingDatasetController.getTrainingDatasetByFeatureViewAndVersion( + featureView, trainingDatasetVersion); + Query query = queryController.makeQuery(featureView, project, user, true, false); + QueryDTO queryDTO = queryBuilder.build(query, featureView.getFeaturestore(), project, user); + return setupTrainingDatasetJob(project, user, trainingDataset, queryDTO, overwrite, writeOptions, + sparkJobConfiguration, FEATURE_VIEW_TRAINING_DATASET_OP); + } + public Jobs setupTrainingDatasetJob(Project project, Users user, TrainingDataset trainingDataset, + QueryDTO queryDTO, Boolean overwrite, Map writeOptions, + SparkJobConfiguration sparkJobConfiguration) + throws FeaturestoreException, JobException, GenericException, ProjectException, ServiceException { + return setupTrainingDatasetJob(project, user, trainingDataset, queryDTO, overwrite, writeOptions, + sparkJobConfiguration, TRAINING_DATASET_OP); + } + + private Jobs setupTrainingDatasetJob(Project project, Users user, TrainingDataset trainingDataset, QueryDTO queryDTO, Boolean overwrite, Map writeOptions, - SparkJobConfiguration sparkJobConfiguration) + SparkJobConfiguration sparkJobConfiguration, String jobType) throws FeaturestoreException, JobException, GenericException, ProjectException, ServiceException { DistributedFileSystemOps udfso = dfs.getDfsOps(hdfsUsersController.getHdfsUserName(project, user)); try { - String jobConfigurationPath = - getJobConfigurationPath(project, trainingDataset.getName(), trainingDataset.getVersion(), "td"); + String jobConfigurationPath; Map jobConfiguration = new HashMap<>(); jobConfiguration.put("feature_store", featurestoreController.getOfflineFeaturestoreDbName(trainingDataset.getFeaturestore().getProject())); - jobConfiguration.put("name", trainingDataset.getName()); - jobConfiguration.put("version", String.valueOf(trainingDataset.getVersion())); - jobConfiguration.put("query", queryDTO); + if (trainingDataset.getFeatureView() != null) { + String featureViewName = trainingDataset.getFeatureView().getName(); + Integer featureViewVersion = trainingDataset.getFeatureView().getVersion(); + jobConfiguration.put("name", featureViewName); + jobConfiguration.put("version", String.valueOf(featureViewVersion)); + jobConfiguration.put("td_version", String.valueOf(trainingDataset.getVersion())); + jobConfigurationPath = + getJobConfigurationPath(project, featureViewName + "_" + featureViewVersion, + trainingDataset.getVersion(), "fv_td"); + } else { + jobConfiguration.put("name", trainingDataset.getName()); + jobConfiguration.put("version", String.valueOf(trainingDataset.getVersion())); + jobConfigurationPath = + getJobConfigurationPath(project, trainingDataset.getName(), trainingDataset.getVersion(), "td"); + // For FeatureView, query is constructed from scratch when launching the job. + jobConfiguration.put("query", queryDTO); + } jobConfiguration.put("write_options", writeOptions); jobConfiguration.put("overwrite", overwrite); String jobConfigurationStr = objectMapper.writeValueAsString(jobConfiguration); writeToHDFS(jobConfigurationPath, jobConfigurationStr, udfso); - String jobArgs = getJobArgs(TRAINING_DATASET_OP, jobConfigurationPath); + String jobArgs = getJobArgs(jobType, jobConfigurationPath); Jobs trainingDatasetJob = configureJob(user, project, sparkJobConfiguration, - getJobName(TRAINING_DATASET_OP, Utils.getTrainingDatasetName(trainingDataset), true), + getJobName(jobType, Utils.getTrainingDatasetName(trainingDataset), true), jobArgs, JobType.PYSPARK); executionController.start(trainingDatasetJob, jobArgs, user); diff --git a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/featuregroup/FeaturegroupController.java b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/featuregroup/FeaturegroupController.java index ae2dabafe2..5d4ca78741 100644 --- a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/featuregroup/FeaturegroupController.java +++ b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/featuregroup/FeaturegroupController.java @@ -335,7 +335,7 @@ public FeaturegroupDTO createFeaturegroupNoValidation(Featurestore featurestore, * @param featuregroup the entity to convert * @return a DTO representation of the entity */ - private FeaturegroupDTO convertFeaturegrouptoDTO(Featuregroup featuregroup, Project project, Users user) + public FeaturegroupDTO convertFeaturegrouptoDTO(Featuregroup featuregroup, Project project, Users user) throws FeaturestoreException, ServiceException { String featurestoreName = featurestoreFacade.getHiveDbName(featuregroup.getFeaturestore().getHiveDbId()); switch (featuregroup.getFeaturegroupType()) { diff --git a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/featuregroup/FeaturegroupDTO.java b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/featuregroup/FeaturegroupDTO.java index 4f1729b9a0..f276856c83 100644 --- a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/featuregroup/FeaturegroupDTO.java +++ b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/featuregroup/FeaturegroupDTO.java @@ -48,7 +48,7 @@ @JsonSubTypes.Type(value = CachedFeaturegroupDTO.class, name = "CachedFeaturegroupDTO"), @JsonSubTypes.Type(value = StreamFeatureGroupDTO.class, name = "StreamFeatureGroupDTO"), @JsonSubTypes.Type(value = OnDemandFeaturegroupDTO.class, name = "OnDemandFeaturegroupDTO")}) -public class FeaturegroupDTO extends FeaturestoreEntityDTO { +public class FeaturegroupDTO extends FeaturestoreEntityDTO { @XmlElement private List features; diff --git a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/featureview/FeatureViewDTO.java b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/featureview/FeatureViewDTO.java index 3c160f2ced..e334284c72 100644 --- a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/featureview/FeatureViewDTO.java +++ b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/featureview/FeatureViewDTO.java @@ -42,8 +42,6 @@ public class FeatureViewDTO extends RestDTO { private String name; private Integer id; private QueryDTO query; - //TODO featrue view: confirm type - private String label; private List features; // contains transformation info // Below fields do not need to provide @@ -134,14 +132,6 @@ public void setQueryString(FsQueryDTO queryString) { this.queryString = queryString; } - public String getLabel() { - return label; - } - - public void setLabel(String label) { - this.label = label; - } - public List getFeatures() { return features; } diff --git a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/featureview/FeatureViewFacade.java b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/featureview/FeatureViewFacade.java index b5d1165dfd..c16e62e9e2 100644 --- a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/featureview/FeatureViewFacade.java +++ b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/featureview/FeatureViewFacade.java @@ -50,13 +50,15 @@ public List findAll() { public List findByFeaturestore(Featurestore featurestore, QueryParam queryParam) { Boolean latestVersion = false; - if (queryParam.getFilters().removeIf(filter -> filter.toString().equals("LATEST_VERSION"))) { + if (queryParam != null && queryParam.getFilters().removeIf(filter -> filter.toString().equals("LATEST_VERSION"))) { latestVersion = true; } Map extraParam = new HashMap<>(); extraParam.put("featurestore", featurestore); - String queryStr = buildQuery("SELECT fv FROM FeatureView fv ", queryParam.getFilters(), queryParam.getSorts(), + String queryStr = buildQuery("SELECT fv FROM FeatureView fv ", + queryParam != null ? queryParam.getFilters(): null, + queryParam != null ? queryParam.getSorts(): null, "fv.featurestore = :featurestore "); Query q = makeQuery(queryStr, queryParam, extraParam); List results = q.getResultList(); diff --git a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/keyword/KeywordControllerIface.java b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/keyword/KeywordControllerIface.java index 93e3d9166c..7f3b16502c 100644 --- a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/keyword/KeywordControllerIface.java +++ b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/keyword/KeywordControllerIface.java @@ -16,7 +16,6 @@ package io.hops.hopsworks.common.featurestore.keyword; -import io.hops.hopsworks.common.hdfs.DistributedFileSystemOps; import io.hops.hopsworks.exceptions.FeaturestoreException; import io.hops.hopsworks.exceptions.MetadataException; import io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup; @@ -25,36 +24,24 @@ import io.hops.hopsworks.persistence.entity.project.Project; import io.hops.hopsworks.persistence.entity.user.Users; -import java.io.IOException; import java.util.List; public interface KeywordControllerIface { - default List getAll(Project project, Users user, - Featuregroup featureGroup, TrainingDataset trainingDataset) + default List getAll(Project project, Users user, Featuregroup featureGroup, TrainingDataset trainingDataset, + FeatureView featureView) throws FeaturestoreException, MetadataException { throw new IllegalArgumentException("API not supported in the community edition"); } - default List getAll(Featuregroup featureGroup, TrainingDataset trainingDataset, - DistributedFileSystemOps udfso) - throws IOException, MetadataException, FeaturestoreException { - throw new IllegalArgumentException("API not supported in the community edition"); - } - - default List getAll(Project project, Users user, FeatureView featureView) - throws IOException, MetadataException, FeaturestoreException { - throw new IllegalArgumentException("API not supported in the community edition"); - } - default List replaceKeywords(Project project, Users user, Featuregroup featureGroup, - TrainingDataset trainingDataset, List keywords) + TrainingDataset trainingDataset, FeatureView featureView, List keywords) throws FeaturestoreException, MetadataException { throw new IllegalArgumentException("API not supported in the community edition"); } default List deleteKeywords(Project project, Users user, Featuregroup featureGroup, - TrainingDataset trainingDataset, List keywords) + TrainingDataset trainingDataset, FeatureView featureView, List keywords) throws FeaturestoreException, MetadataException { throw new IllegalArgumentException("API not supported in the community edition"); } diff --git a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/query/Feature.java b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/query/Feature.java index 6d614f1065..2849002947 100644 --- a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/query/Feature.java +++ b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/query/Feature.java @@ -32,6 +32,7 @@ public class Feature { private Featuregroup featureGroup; private Integer idx; + // For testing purposes public Feature(String name, String fgAlias, String type, boolean primary, String defaultValue, String prefix) { this.name = name; this.fgAlias = fgAlias; diff --git a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/query/Query.java b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/query/Query.java index b8a4187cc5..86fef61eef 100644 --- a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/query/Query.java +++ b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/query/Query.java @@ -37,8 +37,6 @@ public class Query { // to build the FROM part of the online query private String project; private Featuregroup featuregroup; - private String leftFeatureGroupStartTime; - private String leftFeatureGroupEndTime; private Long leftFeatureGroupStartTimestamp; private Long leftFeatureGroupEndTimestamp; private Long leftFeatureGroupEndCommitId; @@ -142,22 +140,6 @@ public void setFeaturegroup(Featuregroup featuregroup) { this.featuregroup = featuregroup; } - public String getLeftFeatureGroupStartTime() { - return leftFeatureGroupStartTime; - } - - public void setLeftFeatureGroupStartTime(String leftFeatureGroupStartTime) { - this.leftFeatureGroupStartTime = leftFeatureGroupStartTime; - } - - public String getLeftFeatureGroupEndTime() { - return leftFeatureGroupEndTime; - } - - public void setLeftFeatureGroupEndTime(String leftFeatureGroupEndTime) { - this.leftFeatureGroupEndTime = leftFeatureGroupEndTime; - } - public Long getLeftFeatureGroupStartTimestamp() { return leftFeatureGroupStartTimestamp; } diff --git a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/query/QueryBuilder.java b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/query/QueryBuilder.java new file mode 100644 index 0000000000..94c80d5397 --- /dev/null +++ b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/query/QueryBuilder.java @@ -0,0 +1,237 @@ +/* + * This file is part of Hopsworks + * Copyright (C) 2022, Logical Clocks AB. All rights reserved + * + * Hopsworks is free software: you can redistribute it and/or modify it under the terms of + * the GNU Affero General Public License as published by the Free Software Foundation, + * either version 3 of the License, or (at your option) any later version. + * + * Hopsworks is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License along with this program. + * If not, see . + */ + +package io.hops.hopsworks.common.featurestore.query; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import io.hops.hopsworks.common.featurestore.FeaturestoreController; +import io.hops.hopsworks.common.featurestore.feature.FeatureGroupFeatureDTO; +import io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupController; +import io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupDTO; +import io.hops.hopsworks.common.featurestore.query.filter.Filter; +import io.hops.hopsworks.common.featurestore.query.filter.FilterDTO; +import io.hops.hopsworks.common.featurestore.query.filter.FilterLogic; +import io.hops.hopsworks.common.featurestore.query.filter.FilterLogicDTO; +import io.hops.hopsworks.common.featurestore.query.join.Join; +import io.hops.hopsworks.common.featurestore.query.join.JoinDTO; +import io.hops.hopsworks.exceptions.FeaturestoreException; +import io.hops.hopsworks.exceptions.ServiceException; +import io.hops.hopsworks.persistence.entity.featurestore.Featurestore; +import io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup; +import io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlCondition; +import io.hops.hopsworks.persistence.entity.project.Project; +import io.hops.hopsworks.persistence.entity.user.Users; +import io.hops.hopsworks.restutils.RESTCodes; + +import javax.ejb.EJB; +import javax.ejb.Stateless; +import javax.ejb.TransactionAttribute; +import javax.ejb.TransactionAttributeType; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +import java.util.logging.Level; +import java.util.stream.Collectors; + +@Stateless +@TransactionAttribute(TransactionAttributeType.NEVER) +public class QueryBuilder { + + private static class FeatureSignature { + + public String featureName; + public Integer featureGroupId; + public Integer featureGroupVersion; + + public FeatureSignature(Feature feature) { + this.featureName = feature.getName(); + this.featureGroupId = feature.getFeatureGroup().getId(); + this.featureGroupVersion = feature.getFeatureGroup().getVersion(); + } + + public FeatureSignature(FeatureGroupFeatureDTO feature, Integer version) { + this.featureName = feature.getName(); + this.featureGroupId = feature.getFeatureGroupId(); + this.featureGroupVersion = version; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + FeatureSignature that = (FeatureSignature) o; + return featureName.equals(that.featureName) && featureGroupId.equals(that.featureGroupId) && + featureGroupVersion.equals(that.featureGroupVersion); + } + + @Override + public int hashCode() { + return Objects.hash(featureName, featureGroupId, featureGroupVersion); + } + } + + @EJB + private FeaturegroupController featuregroupController; + @EJB + private FeaturestoreController featurestoreController; + + public QueryBuilder() { + } + + public QueryDTO build(Query query, Featurestore featurestore, Project project, Users user) + throws FeaturestoreException, ServiceException { + // featureToDTO and allJoinedFeatures are set only once at the top level query. + Map featureToDTO = makeFeatureToFeatureDTOMap(query, project, user); + + List allJoinedFeatures = Lists.newArrayList(); + for (Feature feature: query.getFeatures()) { + allJoinedFeatures.add(convertToFeatureDTO(feature, featureToDTO)); + } + return build(query, featurestore, project, user, featureToDTO, allJoinedFeatures); + } + + private QueryDTO build(Query query, Featurestore featurestore, Project project, Users user, + Map featureToDTO, List allJoinedFeatures) + throws FeaturestoreException, ServiceException { + + QueryDTO queryDTO = new QueryDTO(); + String featureStoreName = query.getFeatureStore(); + // featureStoreId has to match with featureStoreName. + // Query's featureStoreName may not be the same as current feature store name in the case of shared project. + Integer featureStoreId = featurestoreController.getFeaturestoreForProjectWithName(project, featureStoreName) + .getFeaturestoreId(); + FeaturegroupDTO leftFeatureGroup = + featuregroupController.convertFeaturegrouptoDTO(query.getFeaturegroup(), project, user); + leftFeatureGroup.setFeatures(featuregroupController.getFeatures(query.getFeaturegroup(), project, user)); + Long leftFeatureGroupStartTime = query.getLeftFeatureGroupStartTimestamp(); + Long leftFeatureGroupEndTime = query.getLeftFeatureGroupEndTimestamp(); + FilterLogicDTO filter = convertToFilterLogicDTO(query.getFilter(), featureToDTO); + Boolean hiveEngine = query.getHiveEngine(); + List joins = convertToJoinDTOs( + query.getJoins(), featurestore, project, user, featureToDTO, allJoinedFeatures); + queryDTO.setFeatureStoreId(featureStoreId); + queryDTO.setFeatureStoreName(featureStoreName); + queryDTO.setLeftFeatureGroup(leftFeatureGroup); + queryDTO.setLeftFeatures( + allJoinedFeatures.stream() + // Select features which belong to the same feature group as the current query only. + .filter(feature -> feature.getFeatureGroupId().equals(query.getFeaturegroup().getId())) + .collect(Collectors.toList()) + ); + queryDTO.setLeftFeatureGroupStartTime(leftFeatureGroupStartTime); + query.setLeftFeatureGroupEndTimestamp(leftFeatureGroupEndTime); + queryDTO.setFilter(filter); + queryDTO.setHiveEngine(hiveEngine); + queryDTO.setJoins(joins); + return queryDTO; + } + + Map makeFeatureToFeatureDTOMap(Query query, Project project, Users user) + throws FeaturestoreException { + Map featureToDTO = Maps.newHashMap(); + // Cannot use set because Featuregroup.equals create infinite loop when comparing statisticsConfig + Map featuregroups = query.getJoins() + .stream().map(join -> join.getRightQuery().getFeaturegroup()) + .collect(Collectors.toMap(Featuregroup::getId, fg -> fg, (f1, f2) -> f1)); + featuregroups.put(query.getFeaturegroup().getId(), query.getFeaturegroup()); + for (Featuregroup featuregroup : featuregroups.values()) { + featuregroupController + .getFeatures(featuregroup, project, user) + .forEach(featureDto -> + featureToDTO.put(new FeatureSignature(featureDto, featuregroup.getVersion()), featureDto)); + } + return featureToDTO; + } + + FilterLogicDTO convertToFilterLogicDTO(FilterLogic filterLogic, + Map featureToDTO) throws FeaturestoreException { + if (filterLogic == null) { + return null; + } + FilterLogicDTO filterLogicDTO = new FilterLogicDTO(filterLogic.getType()); + filterLogicDTO.setLeftFilter(convertToFilterDTO(filterLogic.getLeftFilter(), featureToDTO)); + filterLogicDTO.setRightFilter(convertToFilterDTO(filterLogic.getRightFilter(), featureToDTO)); + filterLogicDTO.setLeftLogic(convertToFilterLogicDTO(filterLogic.getLeftLogic(), featureToDTO)); + filterLogicDTO.setRightLogic(convertToFilterLogicDTO(filterLogic.getRightLogic(), featureToDTO)); + return filterLogicDTO; + } + + FilterDTO convertToFilterDTO(Filter filter, Map featureToDTO) + throws FeaturestoreException { + if (filter == null) { + return null; + } + FeatureGroupFeatureDTO feature = convertToFeatureDTO(filter.getFeatures().get(0), featureToDTO); + SqlCondition condition = filter.getCondition(); + String value = filter.getValue().getValue(); + return new FilterDTO(feature, condition, value); + } + + FeatureGroupFeatureDTO convertToFeatureDTO(Feature feature, + Map featureToDTO) throws FeaturestoreException { + FeatureGroupFeatureDTO featureDTO = featureToDTO.get(new FeatureSignature(feature)); + if (featureDTO == null) { + throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.FEATURE_NOT_FOUND, + Level.SEVERE, feature.getName() + " is not found"); + } + return featureDTO; + } + + List convertToJoinDTOs(List joins, Featurestore featurestore, Project project, Users user, + Map featureToDTO, List allJoinedFeatures) + throws FeaturestoreException, ServiceException { + if (joins == null || joins.isEmpty()) { + return null; + } + List joinDTOS = Lists.newArrayList(); + for (Join join : joins) { + joinDTOS.add(convertToJoinDTO(join, featurestore, project, user, featureToDTO, allJoinedFeatures)); + } + return joinDTOS; + } + + JoinDTO convertToJoinDTO(Join join, Featurestore featurestore, Project project, Users user, + Map featureToDTO, List allJoinedFeatures) + throws FeaturestoreException, ServiceException { + JoinDTO joinDTO = new JoinDTO(); + QueryDTO queryDTO = build(join.getRightQuery(), featurestore, project, user, featureToDTO, allJoinedFeatures); + List rightOn = join.getRightOn() + .stream().map(feature -> { + // Features in join condition only contain name. + FeatureGroupFeatureDTO featureDTO = new FeatureGroupFeatureDTO(); + featureDTO.setName(feature.getName()); + return featureDTO; + }).collect(Collectors.toList()); + List leftOn = join.getLeftOn() + .stream().map(feature -> { + FeatureGroupFeatureDTO featureDTO = new FeatureGroupFeatureDTO(); + featureDTO.setName(feature.getName()); + return featureDTO; + }).collect(Collectors.toList()); + joinDTO.setQuery(queryDTO); + joinDTO.setRightOn(rightOn); + joinDTO.setLeftOn(leftOn); + joinDTO.setType(join.getJoinType()); + joinDTO.setPrefix(join.getPrefix()); + return joinDTO; + } +} diff --git a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/query/QueryController.java b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/query/QueryController.java index 988807f09b..315e6cd5dc 100644 --- a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/query/QueryController.java +++ b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/query/QueryController.java @@ -24,14 +24,21 @@ import io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupFacade; import io.hops.hopsworks.common.featurestore.featuregroup.cached.FeatureGroupCommitController; import io.hops.hopsworks.common.featurestore.online.OnlineFeaturestoreController; +import io.hops.hopsworks.common.featurestore.query.filter.Filter; import io.hops.hopsworks.common.featurestore.query.filter.FilterController; +import io.hops.hopsworks.common.featurestore.query.filter.FilterLogic; import io.hops.hopsworks.common.featurestore.query.join.Join; import io.hops.hopsworks.common.featurestore.query.join.JoinDTO; +import io.hops.hopsworks.common.featurestore.trainingdatasets.TrainingDatasetController; import io.hops.hopsworks.exceptions.FeaturestoreException; import io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup; import io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.FeatureGroupCommit; import io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.TimeTravelFormat; +import io.hops.hopsworks.persistence.entity.featurestore.featureview.FeatureView; import io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlCondition; +import io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlFilterLogic; +import io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFeature; +import io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetJoin; import io.hops.hopsworks.persistence.entity.project.Project; import io.hops.hopsworks.persistence.entity.user.Users; import io.hops.hopsworks.restutils.RESTCodes; @@ -42,6 +49,8 @@ import javax.ejb.TransactionAttribute; import javax.ejb.TransactionAttributeType; import java.util.ArrayList; +import java.util.Comparator; +import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -49,6 +58,8 @@ import java.util.logging.Level; import java.util.stream.Collectors; +import static io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlFilterLogic.AND; + @Stateless @TransactionAttribute(TransactionAttributeType.NEVER) public class QueryController { @@ -65,6 +76,8 @@ public class QueryController { private FeaturestoreFacade featurestoreFacade; @EJB private OnlineFeaturestoreController onlineFeaturestoreController; + @EJB + private TrainingDatasetController trainingDatasetController; private final static String ALL_FEATURES = "*"; public QueryController() { @@ -104,6 +117,7 @@ public Query convertQueryDTO(QueryDTO queryDTO, Map fgAliasLook Map fgLookup, Map> availableFeatureLookup, boolean pitEnabled) throws FeaturestoreException { + checkNestedJoin(queryDTO); Integer fgId = queryDTO.getLeftFeatureGroup().getId(); Featuregroup fg = fgLookup.get(fgId); @@ -160,6 +174,60 @@ public Query convertQueryDTO(QueryDTO queryDTO, Map fgAliasLook return query; } + void checkNestedJoin(QueryDTO queryDTO) throws FeaturestoreException { + if (queryDTO.getJoins() != null) { + for (JoinDTO join : queryDTO.getJoins()) { + if (join.getQuery().getJoins() != null && join.getQuery().getJoins().size() > 0) { + throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.NESTED_JOIN_NOT_ALLOWED, + Level.SEVERE, + "Nested join is not supported."); + } + } + } + } + + public Query appendFilter(Query query, SqlFilterLogic sqlLogic, FilterLogic filterLogic) { + FilterLogic currentFilter = query.getFilter(); + if(currentFilter != null) { + FilterLogic filter = new FilterLogic(sqlLogic, currentFilter, filterLogic); + query.setFilter(filter); + } else { + query.setFilter(filterLogic); + } + return query; + } + + public Query appendEventTimeFilter(Query query, Date startTime, Date endTime) throws FeaturestoreException { + query = appendEventTimeFilter(query, startTime, SqlCondition.GREATER_THAN_OR_EQUAL); + return appendEventTimeFilter(query, endTime, SqlCondition.LESS_THAN_OR_EQUAL); + } + + private Query appendEventTimeFilter(Query query, Date eventTime, SqlCondition sqlCondition) + throws FeaturestoreException { + if (eventTime != null) { + Filter eventTimeFilter = createEventTimeFilter(getEventTimeFeature(query), sqlCondition, eventTime); + return appendFilter(query, AND, new FilterLogic(eventTimeFilter)); + } + return query; + } + + Feature getEventTimeFeature(Query query) throws FeaturestoreException { + String eventTimeFieldName = query.getFeaturegroup().getEventTime(); + if (eventTimeFieldName == null) { + throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.EVENT_TIME_FEATURE_NOT_FOUND, Level.FINE, + "Cannot find event feature in feature group " + query.getFeaturegroup().getName()); + } + return query.getAvailableFeatures() + .stream() + .filter(feature -> feature.getName().equals(eventTimeFieldName)) + .findFirst() + .orElseThrow(IllegalStateException::new); + } + + Filter createEventTimeFilter(Feature feature, SqlCondition condition, Date time) throws FeaturestoreException { + String value = filterController.convertToEventTimeFeatureValue(feature, time); + return new Filter(feature, condition, value); + } public int populateFgLookupTables(QueryDTO queryDTO, int fgId, Map fgAliasLookup, Map fgLookup, @@ -412,4 +480,41 @@ void removeDuplicateColumns(Query query, boolean pitEnabled) { } } + public Query makeQuery(FeatureView featureView, Project project, Users user, boolean withLabel, Boolean isHiveEngine) + throws FeaturestoreException { + List joins = featureView.getJoins().stream() + .sorted(Comparator.comparing(TrainingDatasetJoin::getIndex)) + .collect(Collectors.toList()); + + List tdFeatures = featureView.getFeatures().stream() + .sorted((t1, t2) -> { + if (t1.getIndex() != null) { + // compare based on index + return t1.getIndex().compareTo(t2.getIndex()); + } else { + // Old training dataset with no index. compare based on name + return t1.getName().compareTo(t2.getName()); + } + }) + // drop label features if desired + .filter(f -> !f.isLabel() || withLabel) + .collect(Collectors.toList()); + + return trainingDatasetController.getQuery(joins, tdFeatures, featureView.getFilters(), project, user, isHiveEngine); + } + + public Query constructBatchQuery(FeatureView featureView, Project project, Users user, Long startTimestamp, + Long endTimestamp, Boolean withLabel, Boolean isHiveEngine) + throws FeaturestoreException { + Date startTime = startTimestamp == null ? null : new Date(startTimestamp); + Date endTime = endTimestamp == null ? null : new Date(endTimestamp); + return constructBatchQuery(featureView, project, user, startTime, endTime, withLabel, isHiveEngine); + } + + public Query constructBatchQuery(FeatureView featureView, Project project, Users user, Date startTime, + Date endTime, Boolean withLabel, Boolean isHiveEngine) + throws FeaturestoreException { + Query baseQuery = makeQuery(featureView, project, user, withLabel, isHiveEngine); + return appendEventTimeFilter(baseQuery, startTime, endTime); + } } diff --git a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/query/filter/FilterController.java b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/query/filter/FilterController.java index 79adf3fdcb..a8ccce5962 100644 --- a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/query/filter/FilterController.java +++ b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/query/filter/FilterController.java @@ -42,8 +42,11 @@ import javax.ejb.TransactionAttribute; import javax.ejb.TransactionAttributeType; import java.io.IOException; +import java.text.DateFormat; +import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; +import java.util.Date; import java.util.List; import java.util.Map; import java.util.Optional; @@ -57,7 +60,9 @@ public class FilterController { private ConstructorController constructorController; private ObjectMapper objectMapper = new ObjectMapper(); - + private DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd"); + private DateFormat timestampFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss"); + public FilterController() {} // for testing @@ -86,6 +91,17 @@ public FilterLogic convertFilterLogic(FilterLogicDTO filterLogicDTO, Map generateSubQueries(Query baseQuery, Query query, boolean is // we always re-select all primary key columns of the "label group" in order to be able to perform final join List additionalPkFeatures = query.getAvailableFeatures().stream().filter(Feature::isPrimary) .map(f -> - new Feature(f.getName(), f.getFgAlias(), f.getType(), f.isPrimary(), f.getDefaultValue(), PK_JOIN_PREFIX)) + new Feature(f.getName(), f.getFgAlias(), f.getType(), f.isPrimary(), f.getDefaultValue(), PK_JOIN_PREFIX, + f.getFeatureGroup())) .collect(Collectors.toList()); additionalPkFeatures.add(new Feature(query.getFeaturegroup().getEventTime(), query.getAs(), (String) null, null, EVT_JOIN_PREFIX)); diff --git a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/statistics/columns/StatisticColumnController.java b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/statistics/columns/StatisticColumnController.java index e91ec711f2..9dc261fe85 100644 --- a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/statistics/columns/StatisticColumnController.java +++ b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/statistics/columns/StatisticColumnController.java @@ -119,9 +119,14 @@ public void verifyStatisticColumnsExist(FeaturegroupDTO featureGroupDTO, Feature public void verifyStatisticColumnsExist(TrainingDatasetDTO trainingDatasetDTO, TrainingDataset trainingDataset) throws FeaturestoreException { - verifyStatisticColumnsExist(trainingDatasetDTO.getStatisticsConfig().getColumns(), - trainingDataset.getFeatures().stream().map(TrainingDatasetFeature::getName).collect(Collectors.toList()), - "training dataset " + trainingDatasetDTO.getName(), trainingDatasetDTO.getVersion()); + List featureNames = trainingDataset.getFeatures().stream() + .map(TrainingDatasetFeature::getName).collect(Collectors.toList()); + if (trainingDataset.getFeatureView() != null) { + featureNames.addAll(trainingDataset.getFeatureView().getFeatures().stream() + .map(TrainingDatasetFeature::getName).collect(Collectors.toList())); + } + verifyStatisticColumnsExist(trainingDatasetDTO.getStatisticsConfig().getColumns(), featureNames, + "training dataset " + trainingDatasetDTO.getName(), trainingDatasetDTO.getVersion()); } public void verifyStatisticColumnsExist(TrainingDatasetDTO trainingDatasetDTO, Query query) diff --git a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/trainingdatasets/DateAdapter.java b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/trainingdatasets/DateAdapter.java new file mode 100644 index 0000000000..e37d3e62b0 --- /dev/null +++ b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/trainingdatasets/DateAdapter.java @@ -0,0 +1,33 @@ +/* + * This file is part of Hopsworks + * Copyright (C) 2022, Logical Clocks AB. All rights reserved + * + * Hopsworks is free software: you can redistribute it and/or modify it under the terms of + * the GNU Affero General Public License as published by the Free Software Foundation, + * either version 3 of the License, or (at your option) any later version. + * + * Hopsworks is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License along with this program. + * If not, see . + */ + +package io.hops.hopsworks.common.featurestore.trainingdatasets; + +import javax.xml.bind.annotation.adapters.XmlAdapter; +import java.util.Date; + +public class DateAdapter extends XmlAdapter { + + @Override + public Long marshal(Date v) { + return v.getTime(); + } + + @Override + public Date unmarshal(Long v) { + return new Date(v); + } +} diff --git a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/trainingdatasets/TrainingDatasetController.java b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/trainingdatasets/TrainingDatasetController.java index dbe2992d95..04bfe66b47 100644 --- a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/trainingdatasets/TrainingDatasetController.java +++ b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/trainingdatasets/TrainingDatasetController.java @@ -17,11 +17,13 @@ package io.hops.hopsworks.common.featurestore.trainingdatasets; import com.logicalclocks.shaded.com.google.common.collect.Streams; +import io.hops.hopsworks.common.dao.user.activity.ActivityFacade; import io.hops.hopsworks.common.featurestore.FeaturestoreFacade; import io.hops.hopsworks.common.featurestore.activity.FeaturestoreActivityFacade; import io.hops.hopsworks.common.featurestore.feature.TrainingDatasetFeatureDTO; import io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupController; import io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupDTO; +import io.hops.hopsworks.common.featurestore.featuregroup.cached.FeatureGroupCommitController; import io.hops.hopsworks.common.featurestore.featuregroup.online.OnlineFeaturegroupController; import io.hops.hopsworks.common.featurestore.online.OnlineFeaturestoreController; import io.hops.hopsworks.common.featurestore.query.Feature; @@ -56,6 +58,7 @@ import io.hops.hopsworks.persistence.entity.featurestore.activity.FeaturestoreActivityMeta; import io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup; import io.hops.hopsworks.persistence.entity.featurestore.featuregroup.FeaturegroupType; +import io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.FeatureGroupCommit; import io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.TimeTravelFormat; import io.hops.hopsworks.persistence.entity.featurestore.featureview.FeatureView; import io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticColumn; @@ -77,8 +80,11 @@ import io.hops.hopsworks.persistence.entity.hdfs.inode.Inode; import io.hops.hopsworks.persistence.entity.project.Project; import io.hops.hopsworks.persistence.entity.user.Users; +import io.hops.hopsworks.persistence.entity.user.activity.ActivityFlag; import io.hops.hopsworks.restutils.RESTCodes; import org.apache.calcite.sql.JoinType; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; import javax.ejb.EJB; import javax.ejb.Stateless; @@ -146,6 +152,10 @@ public class TrainingDatasetController { private PitJoinController pitJoinController; @EJB private QueryController queryController; + @EJB + private ActivityFacade activityFacade; + @EJB + private FeatureGroupCommitController featureGroupCommitCommitController; /** * Gets all trainingDatasets for a particular featurestore and project @@ -174,32 +184,52 @@ public List getTrainingDatasetsForFeaturestore(Users user, P * @throws ServiceException * @throws FeaturestoreException */ - private TrainingDatasetDTO convertTrainingDatasetToDTO(Users user, Project project, TrainingDataset trainingDataset) + public TrainingDatasetDTO convertTrainingDatasetToDTO(Users user, Project project, TrainingDataset trainingDataset) + throws ServiceException, FeaturestoreException { + return convertTrainingDatasetToDTO(user, project, trainingDataset, false); + } + + /** + * Converts a trainingDataset entity to a TrainingDataset DTO + * + * @param user + * @param project + * @param trainingDataset trainingDataset entity + * @param skipFeature do not include feature + * @return JSON/XML DTO of the trainingDataset + * @throws ServiceException + * @throws FeaturestoreException + */ + public TrainingDatasetDTO convertTrainingDatasetToDTO(Users user, Project project, TrainingDataset trainingDataset, + Boolean skipFeature) throws ServiceException, FeaturestoreException { TrainingDatasetDTO trainingDatasetDTO = new TrainingDatasetDTO(trainingDataset); String featurestoreName = featurestoreFacade.getHiveDbName(trainingDataset.getFeaturestore().getHiveDbId()); trainingDatasetDTO.setFeaturestoreName(featurestoreName); - // Set features - List tdFeatures = getFeaturesSorted(trainingDataset, true); - Map fsLookupTable = getFsLookupTableFeatures(tdFeatures); - trainingDatasetDTO.setFeatures(tdFeatures - .stream() - .map(f -> new TrainingDatasetFeatureDTO(checkPrefix(f), f.getType(), - f.getFeatureGroup() != null ? - new FeaturegroupDTO(f.getFeatureGroup().getFeaturestore().getId(), - fsLookupTable.get(f.getFeatureGroup().getFeaturestore().getId()), - f.getFeatureGroup().getId(), f.getFeatureGroup().getName(), - f.getFeatureGroup().getVersion(), - onlineFeaturegroupController.onlineFeatureGroupTopicName(project.getId(), - f.getFeatureGroup().getId(), Utils.getFeaturegroupName(f.getFeatureGroup()))) - : null, - f.getIndex(), f.isLabel())) - .collect(Collectors.toList())); + if (!skipFeature) { + // Set features + List tdFeatures = getFeaturesSorted(trainingDataset, true); + Map fsLookupTable = getFsLookupTableFeatures(tdFeatures); + trainingDatasetDTO.setFeatures(tdFeatures + .stream() + .map(f -> new TrainingDatasetFeatureDTO(checkPrefix(f), f.getType(), + f.getFeatureGroup() != null ? + new FeaturegroupDTO(f.getFeatureGroup().getFeaturestore().getId(), + fsLookupTable.get(f.getFeatureGroup().getFeaturestore().getId()), + f.getFeatureGroup().getId(), f.getFeatureGroup().getName(), + f.getFeatureGroup().getVersion(), + onlineFeaturegroupController.onlineFeatureGroupTopicName(project.getId(), + f.getFeatureGroup().getId(), Utils.getFeaturegroupName(f.getFeatureGroup()))) + : null, + f.getIndex(), f.isLabel())) + .collect(Collectors.toList())); + } switch (trainingDataset.getTrainingDatasetType()) { case HOPSFS_TRAINING_DATASET: + case IN_MEMORY_TRAINING_DATASET: return hopsfsTrainingDatasetController.convertHopsfsTrainingDatasetToDTO(trainingDatasetDTO, trainingDataset); case EXTERNAL_TRAINING_DATASET: return externalTrainingDatasetController.convertExternalTrainingDatasetToDTO(user, project, @@ -213,14 +243,46 @@ private TrainingDatasetDTO convertTrainingDatasetToDTO(Users user, Project proje } public TrainingDatasetDTO createTrainingDataset(Users user, Project project, Featurestore featurestore, - TrainingDatasetDTO trainingDatasetDTO) + FeatureView featureView, TrainingDatasetDTO trainingDatasetDTO) + throws FeaturestoreException, ProvenanceException, IOException, ServiceException { + // Name of Training data = _, version is needed + // because there can be multiple training dataset of same name from different version of feature view + trainingDatasetDTO.setName(featureView.getName() + "_" + featureView.getVersion()); + Query query = queryController.makeQuery(featureView, project, user, true, false); + + return createTrainingDataset(user, project, featurestore, featureView, trainingDatasetDTO, query, true); + } + + public TrainingDatasetDTO createTrainingDataset(Users user, Project project, Featurestore featurestore, + TrainingDatasetDTO trainingDatasetDTO) + throws FeaturestoreException, ProvenanceException, IOException, ServiceException { + + // If the training dataset is constructed from a query, verify that it compiles correctly + Query query = null; + if (trainingDatasetDTO.getQueryDTO() != null) { + query = constructQuery(trainingDatasetDTO.getQueryDTO(), project, user); + } else if (trainingDatasetDTO.getFeatures() == null) { + throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_NO_SCHEMA, + Level.FINE, "The training dataset doesn't have any feature"); + } + return createTrainingDataset(user, project, featurestore, null, trainingDatasetDTO, query, false); + } + + + private TrainingDatasetDTO createTrainingDataset(Users user, Project project, Featurestore featurestore, + FeatureView featureView, TrainingDatasetDTO trainingDatasetDTO, Query query, Boolean skipFeature) throws FeaturestoreException, ProvenanceException, IOException, ServiceException { // if version not provided, get latest and increment if (trainingDatasetDTO.getVersion() == null) { // returns ordered list by desc version - List tdPrevious = trainingDatasetFacade.findByNameAndFeaturestoreOrderedDescVersion( - trainingDatasetDTO.getName(), featurestore); + List tdPrevious; + if (featureView != null) { + tdPrevious = trainingDatasetFacade.findByFeatureViewAndVersionOrderedDescVersion(featureView); + } else { + tdPrevious = trainingDatasetFacade.findByNameAndFeaturestoreOrderedDescVersion( + trainingDatasetDTO.getName(), featurestore); + } if (tdPrevious != null && !tdPrevious.isEmpty()) { trainingDatasetDTO.setVersion(tdPrevious.get(0).getVersion() + 1); } else { @@ -229,28 +291,22 @@ public TrainingDatasetDTO createTrainingDataset(Users user, Project project, Fea } // Check that training dataset doesn't already exists - if (trainingDatasetFacade.findByNameVersionAndFeaturestore - (trainingDatasetDTO.getName(), trainingDatasetDTO.getVersion(), featurestore) - .isPresent()) { + if ((featureView != null && trainingDatasetFacade.findByFeatureViewAndVersionNullable( + featureView, trainingDatasetDTO.getVersion()).isPresent()) || + (featureView == null && trainingDatasetFacade.findByNameVersionAndFeaturestore( + trainingDatasetDTO.getName(), trainingDatasetDTO.getVersion(), featurestore).isPresent()) + ) { throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_ALREADY_EXISTS, Level.FINE, "Training Dataset: " + trainingDatasetDTO.getName() + ", version: " + trainingDatasetDTO.getVersion()); } - // If the training dataset is constructed from a query, verify that it compiles correctly - Query query = null; - if (trainingDatasetDTO.getQueryDTO() != null) { - query = constructQuery(trainingDatasetDTO.getQueryDTO(), project, user); - } else if (trainingDatasetDTO.getFeatures() == null) { - throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_NO_SCHEMA, - Level.FINE, "The training dataset doesn't have any feature"); - } - // Verify input inputValidation.validate(trainingDatasetDTO, query); Inode inode = null; FeaturestoreConnector featurestoreConnector; - if(trainingDatasetDTO.getTrainingDatasetType() == TrainingDatasetType.HOPSFS_TRAINING_DATASET) { + TrainingDatasetDTO completeTrainingDatasetDTO; + if (trainingDatasetDTO.getTrainingDatasetType() == TrainingDatasetType.HOPSFS_TRAINING_DATASET) { if (trainingDatasetDTO.getStorageConnector() != null && trainingDatasetDTO.getStorageConnector().getId() != null) { featurestoreConnector = featurestoreConnectorFacade @@ -260,7 +316,7 @@ public TrainingDatasetDTO createTrainingDataset(Users user, Project project, Fea } else { featurestoreConnector = getDefaultHopsFSTrainingDatasetConnector(featurestore); } - } else { + } else if (trainingDatasetDTO.getTrainingDatasetType() == TrainingDatasetType.EXTERNAL_TRAINING_DATASET) { if (trainingDatasetDTO.getStorageConnector() == null) { throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.CONNECTOR_NOT_FOUND, Level.FINE, "Storage connector is empty"); @@ -270,52 +326,58 @@ public TrainingDatasetDTO createTrainingDataset(Users user, Project project, Fea .findById(trainingDatasetDTO.getStorageConnector().getId()) .orElseThrow(() -> new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.CONNECTOR_NOT_FOUND, Level.FINE, "Connector: " + trainingDatasetDTO.getStorageConnector().getId())); + } else { + featurestoreConnector = getDefaultHopsFSTrainingDatasetConnector(featurestore); } - + // for HopsFS TD it will either be the default connector already or it will be a connector pointing to another // HopsFS Directory - // for external TD we will use default connector + // for external TD or in-memory TD we will use default connector Dataset trainingDatasetsFolder; if (featurestoreConnector.getHopsfsConnector() != null) { trainingDatasetsFolder = featurestoreConnector.getHopsfsConnector().getHopsfsDataset(); } else { trainingDatasetsFolder = - getDefaultHopsFSTrainingDatasetConnector(featurestore).getHopsfsConnector().getHopsfsDataset(); + getDefaultHopsFSTrainingDatasetConnector(featurestore).getHopsfsConnector().getHopsfsDataset(); } - + // TODO(Fabio) account for path // we allow specifying the path in the training dataset dir, but it is not really used, this option will be // deprecated for hopsfs training datasets. String trainingDatasetPath = getTrainingDatasetPath( - inodeController.getPath(trainingDatasetsFolder.getInode()), - trainingDatasetDTO.getName(), trainingDatasetDTO.getVersion()); - + inodeController.getPath(trainingDatasetsFolder.getInode()), + trainingDatasetDTO.getName(), trainingDatasetDTO.getVersion()); + DistributedFileSystemOps udfso = null; String username = hdfsUsersBean.getHdfsUserName(project, user); try { udfso = dfs.getDfsOps(username); udfso.mkdir(trainingDatasetPath); - + inode = inodeController.getInodeAtPath(trainingDatasetPath); - TrainingDatasetDTO completeTrainingDatasetDTO = createTrainingDatasetMetadata(user, project, - featurestore, trainingDatasetDTO, query, featurestoreConnector, inode); - fsProvenanceController.trainingDatasetAttachXAttr(trainingDatasetPath, completeTrainingDatasetDTO, udfso); + completeTrainingDatasetDTO = createTrainingDatasetMetadata(user, project, + featurestore, featureView, trainingDatasetDTO, query, featurestoreConnector, inode, skipFeature); + if (featureView == null) { + //TODO: do provenance when creating feature view instead + fsProvenanceController.trainingDatasetAttachXAttr(trainingDatasetPath, completeTrainingDatasetDTO, udfso); + } + activityFacade.persistActivity(ActivityFacade.CREATED_TRAINING_DATASET + + completeTrainingDatasetDTO.getName(), project, user, ActivityFlag.SERVICE); return completeTrainingDatasetDTO; } finally { if (udfso != null) { dfs.closeDfsClient(udfso); } } - } - + private FeaturestoreConnector getDefaultHopsFSTrainingDatasetConnector(Featurestore featurestore) throws FeaturestoreException { String connectorName = - featurestore.getProject().getName() + "_" + Settings.ServiceDataset.TRAININGDATASETS.getName(); + featurestore.getProject().getName() + "_" + Settings.ServiceDataset.TRAININGDATASETS.getName(); return featurestoreConnectorFacade.findByFeaturestoreName(featurestore, connectorName) - .orElseThrow(() -> new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.HOPSFS_CONNECTOR_NOT_FOUND, - Level.FINE, "HOPSFS Connector: " + connectorName)); + .orElseThrow(() -> new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.HOPSFS_CONNECTOR_NOT_FOUND, + Level.FINE, "HOPSFS Connector: " + connectorName)); } /** @@ -323,14 +385,16 @@ private FeaturestoreConnector getDefaultHopsFSTrainingDatasetConnector(Featurest */ @TransactionAttribute(TransactionAttributeType.REQUIRED) private TrainingDatasetDTO createTrainingDatasetMetadata(Users user, Project project, Featurestore featurestore, - TrainingDatasetDTO trainingDatasetDTO, Query query, - FeaturestoreConnector featurestoreConnector, Inode inode) + FeatureView featureView, TrainingDatasetDTO trainingDatasetDTO, Query query, + FeaturestoreConnector featurestoreConnector, Inode inode, Boolean skipFeature) throws FeaturestoreException, ServiceException { //Create specific dataset type HopsfsTrainingDataset hopsfsTrainingDataset = null; ExternalTrainingDataset externalTrainingDataset = null; switch (trainingDatasetDTO.getTrainingDatasetType()) { case HOPSFS_TRAINING_DATASET: + case IN_MEMORY_TRAINING_DATASET: + // inode is required for keyword and tag for even for in-memory training dataset hopsfsTrainingDataset = hopsfsTrainingDatasetFacade.createHopsfsTrainingDataset(featurestoreConnector, inode); break; @@ -347,6 +411,14 @@ private TrainingDatasetDTO createTrainingDatasetMetadata(Users user, Project pro //Store trainingDataset metadata in Hopsworks TrainingDataset trainingDataset = new TrainingDataset(); + trainingDataset.setFeatureView(featureView); + if (trainingDatasetDTO.getEventStartTime() != null) { + trainingDataset.setStartTime(trainingDatasetDTO.getEventStartTime()); + } + if (trainingDatasetDTO.getEventEndTime() != null) { + trainingDataset.setEndTime(trainingDatasetDTO.getEventEndTime()); + } + trainingDataset.setSampleRatio(trainingDatasetDTO.getSampleRatio()); trainingDataset.setName(trainingDatasetDTO.getName()); trainingDataset.setHopsfsTrainingDataset(hopsfsTrainingDataset); trainingDataset.setExternalTrainingDataset(externalTrainingDataset); @@ -373,21 +445,24 @@ private TrainingDatasetDTO createTrainingDatasetMetadata(Users user, Project pro trainingDataset.setStatisticsConfig(statisticsConfig); trainingDataset.setTrainSplit(trainingDatasetDTO.getTrainSplit()); - // set features/query - trainingDataset.setQuery(trainingDatasetDTO.getQueryDTO() != null); - if (trainingDataset.isQuery()) { - setTrainingDatasetQuery(query, trainingDatasetDTO.getFeatures(), trainingDataset); - } else { - trainingDataset.setFeatures(getTrainingDatasetFeatures(trainingDatasetDTO.getFeatures(), trainingDataset)); + // Query info is stored in FeatureView instead of TrainingDataset + if (featureView == null) { + // set features/query + trainingDataset.setQuery(trainingDatasetDTO.getQueryDTO() != null); + if (trainingDataset.isQuery()) { + setTrainingDatasetQuery(query, trainingDatasetDTO.getFeatures(), trainingDataset); + } else if (trainingDatasetDTO.getFeatures() != null) { + trainingDataset.setFeatures(getTrainingDatasetFeatures(trainingDatasetDTO.getFeatures(), trainingDataset)); + } } TrainingDataset dbTrainingDataset = trainingDatasetFacade.update(trainingDataset); // Log the metadata operation - fsActivityFacade.logMetadataActivity(user, dbTrainingDataset, FeaturestoreActivityMeta.TD_CREATED); + fsActivityFacade.logMetadataActivity(user, dbTrainingDataset, featureView, FeaturestoreActivityMeta.TD_CREATED); //Get final entity from the database - return convertTrainingDatasetToDTO(user, project, dbTrainingDataset); + return convertTrainingDatasetToDTO(user, project, dbTrainingDataset, skipFeature); } @@ -416,8 +491,18 @@ private void setTrainingDatasetQuery(Query query, trainingDataset.setFilters(filters); } - List convertToFilterEntities(FilterLogic filterLogic, TrainingDataset trainingDataset, - String path) { + List convertToFilterEntities(FilterLogic filterLogic, + TrainingDataset trainingDataset, String path) { + return convertToFilterEntities(filterLogic, null, trainingDataset, path); + } + + public List convertToFilterEntities(FilterLogic filterLogic, + FeatureView featureView, String path) { + return convertToFilterEntities(filterLogic, featureView, null, path); + } + + private List convertToFilterEntities(FilterLogic filterLogic, + FeatureView featureView, TrainingDataset trainingDataset, String path) { List filters = new ArrayList<>(); if (filterLogic == null) { return filters; @@ -425,31 +510,34 @@ List convertToFilterEntities(FilterLogic filterLogic, Tra if (filterLogic.getType().equals(SqlFilterLogic.SINGLE)) { if (filterLogic.getLeftFilter() == null) { filters.add( - makeTrainingDatasetFilter(path, trainingDataset, filterLogic.getRightFilter(), SqlFilterLogic.SINGLE)); + makeTrainingDatasetFilter(path, featureView, trainingDataset, filterLogic.getRightFilter(), + SqlFilterLogic.SINGLE)); } else { filters.add( - makeTrainingDatasetFilter(path, trainingDataset, filterLogic.getLeftFilter(), filterLogic.getType())); + makeTrainingDatasetFilter(path, featureView, trainingDataset, filterLogic.getLeftFilter(), + filterLogic.getType())); } } else { filters.add( - makeTrainingDatasetFilter(path, trainingDataset, null, filterLogic.getType())); + makeTrainingDatasetFilter(path,featureView, trainingDataset, null, filterLogic.getType())); if (filterLogic.getLeftFilter() != null) { filters.add(makeTrainingDatasetFilter( - path + ".L", trainingDataset, filterLogic.getLeftFilter(), SqlFilterLogic.SINGLE)); + path + ".L", featureView, trainingDataset, filterLogic.getLeftFilter(), SqlFilterLogic.SINGLE)); } if (filterLogic.getRightFilter() != null) { filters.add(makeTrainingDatasetFilter( - path + ".R", trainingDataset, filterLogic.getRightFilter(), SqlFilterLogic.SINGLE)); + path + ".R", featureView, trainingDataset, filterLogic.getRightFilter(), SqlFilterLogic.SINGLE)); } - filters.addAll(convertToFilterEntities(filterLogic.getLeftLogic(), trainingDataset, path + ".L")); - filters.addAll(convertToFilterEntities(filterLogic.getRightLogic(), trainingDataset, path + ".R")); + filters.addAll(convertToFilterEntities(filterLogic.getLeftLogic(), featureView, trainingDataset, path + ".L")); + filters.addAll(convertToFilterEntities(filterLogic.getRightLogic(), featureView, trainingDataset, path + ".R")); } return filters; } - private TrainingDatasetFilter makeTrainingDatasetFilter(String path, TrainingDataset trainingDataset, - Filter filter, SqlFilterLogic type) { - TrainingDatasetFilter trainingDatasetFilter = new TrainingDatasetFilter(trainingDataset); + private TrainingDatasetFilter makeTrainingDatasetFilter(String path, FeatureView featureView, + TrainingDataset trainingDataset, Filter filter, SqlFilterLogic type) { + TrainingDatasetFilter trainingDatasetFilter = featureView == null ? + new TrainingDatasetFilter(trainingDataset) : new TrainingDatasetFilter(featureView); TrainingDatasetFilterCondition condition = filter == null ? null : convertFilter(filter, trainingDatasetFilter); trainingDatasetFilter.setCondition(condition); trainingDatasetFilter.setPath(path); @@ -582,6 +670,15 @@ public TrainingDataset getTrainingDatasetById(Featurestore featurestore, Integer Level.FINE, "trainingDatasetId: " + id)); } + public TrainingDataset getTrainingDatasetByFeatureViewAndVersion(FeatureView featureView, Integer version) + throws FeaturestoreException { + return trainingDatasetFacade.findByFeatureViewAndVersion(featureView, version); + } + + public List getTrainingDatasetByFeatureView(FeatureView featureView) { + return trainingDatasetFacade.findByFeatureView(featureView); + } + public List getWithNameAndFeaturestore(Users user, Project project, Featurestore featurestore, String name) throws FeaturestoreException, ServiceException { @@ -612,40 +709,105 @@ public TrainingDatasetDTO getWithNameVersionAndFeaturestore(Users user, Project public String delete(Users user, Project project, Featurestore featurestore, Integer trainingDatasetId) throws FeaturestoreException { - TrainingDataset trainingDataset = trainingDatasetFacade.findByIdAndFeaturestore(trainingDatasetId, featurestore) - .orElseThrow(() -> new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_NOT_FOUND, - Level.FINE, "training dataset id:" + trainingDatasetId)); + .orElseThrow(() -> new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_NOT_FOUND, + Level.FINE, "training dataset id:" + trainingDatasetId)); + return delete(user, project, featurestore, trainingDataset); + } + + public void delete(Users user, Project project, Featurestore featurestore, FeatureView featureView, + Integer trainingDatasetVersion) throws FeaturestoreException { + TrainingDataset trainingDataset = getTrainingDatasetByFeatureViewAndVersion(featureView, trainingDatasetVersion); + String trainingDatasetName = delete(user, project, featurestore, trainingDataset); + activityFacade.persistActivity(ActivityFacade.DELETED_TRAINING_DATASET + trainingDatasetName + + " and version " + trainingDataset.getVersion(), + project, user, ActivityFlag.SERVICE); + } + + public void delete(Users user, Project project, Featurestore featurestore, FeatureView featureView) + throws FeaturestoreException { + List trainingDatasets = getTrainingDatasetByFeatureView(featureView); + + // Delete all only if user has the right to delete all versions of training dataset. + for (TrainingDataset trainingDataset: trainingDatasets) { + featurestoreUtils.verifyUserRole(trainingDataset, featurestore, user, project); + } + for (TrainingDataset trainingDataset: trainingDatasets) { + // Since this method is seldomly called, so ok that retrieving TrainingDataset from DB twice. + delete(user, project, featurestore, featureView, trainingDataset.getVersion()); + } + } + + public String delete(Users user, Project project, Featurestore featurestore, TrainingDataset trainingDataset) + throws FeaturestoreException { featurestoreUtils.verifyUserRole(trainingDataset, featurestore, user, project); statisticsController.deleteStatistics(project, user, trainingDataset); + trainingDatasetFacade.removeTrainingDataset(trainingDataset); + deleteHopsfsTrainingData(user, project, featurestore, trainingDataset, false, false); + return trainingDataset.getName(); + } + + public void deleteDataOnly(Users user, Project project, Featurestore featurestore, FeatureView featureView, + Integer trainingDatasetVersion) throws FeaturestoreException { + TrainingDataset trainingDataset = getTrainingDatasetByFeatureViewAndVersion(featureView, trainingDatasetVersion); + deleteHopsfsTrainingData(user, project, featurestore, trainingDataset, true, true); + activityFacade.persistActivity(ActivityFacade.DELETED_TRAINING_DATASET_DATA_ONLY + trainingDataset.getName() + + " and version " + trainingDataset.getVersion(), + project, user, ActivityFlag.SERVICE); + } + + public void deleteDataOnly(Users user, Project project, Featurestore featurestore, FeatureView featureView) + throws FeaturestoreException { + List trainingDatasets = getTrainingDatasetByFeatureView(featureView); + + // Delete all only if user has the right to delete all versions of training dataset. + for (TrainingDataset trainingDataset: trainingDatasets) { + featurestoreUtils.verifyUserRole(trainingDataset, featurestore, user, project); + } + + for (TrainingDataset trainingDataset: trainingDatasets) { + deleteDataOnly(user, project, featurestore, featureView, trainingDataset.getVersion()); + } + } + + public void deleteHopsfsTrainingData(Users user, Project project, Featurestore featurestore, + TrainingDataset trainingDataset, Boolean verifyRole, Boolean keepMetadata) + throws FeaturestoreException { + if (verifyRole) { + featurestoreUtils.verifyUserRole(trainingDataset, featurestore, user, project); + } String dsPath = getTrainingDatasetInodePath(trainingDataset); String username = hdfsUsersBean.getHdfsUserName(project, user); - // we rely on the foreign keys to cascade from inode -> external/hopsfs td -> trainig dataset DistributedFileSystemOps udfso = dfs.getDfsOps(username); try { // TODO(Fabio): if Data owner *In project* do operation as superuser - udfso.rm(dsPath, true); + if (keepMetadata) { + // Since keywords and tags are stored as attribute of the folder, delete contents in the folder only. + FileStatus[] fileStatuses = udfso.listStatus(new Path(dsPath)); + for (FileStatus fileStatus : fileStatuses) { + udfso.rm(fileStatus.getPath(), true); + } + } else { + udfso.rm(dsPath, true); + } } catch (IOException e) { - } finally { if (udfso != null) { dfs.closeDfsClient(udfso); } } - - return trainingDataset.getName(); } public String getTrainingDatasetInodePath(TrainingDataset trainingDataset) { - if (trainingDataset.getTrainingDatasetType() == TrainingDatasetType.HOPSFS_TRAINING_DATASET) { - return inodeController.getPath(trainingDataset.getHopsfsTrainingDataset().getInode()); - } else { + if (trainingDataset.getTrainingDatasetType() == TrainingDatasetType.EXTERNAL_TRAINING_DATASET) { return inodeController.getPath(trainingDataset.getExternalTrainingDataset().getInode()); + } else { + return inodeController.getPath(trainingDataset.getHopsfsTrainingDataset().getInode()); } } @@ -709,6 +871,10 @@ public TrainingDatasetDTO updateTrainingDatasetStatsConfig(Users user, Project p .persistStatisticColumns(trainingDataset, trainingDatasetDTO.getStatisticsConfig().getColumns()); // get feature group again with persisted columns - this trip to the database can be saved trainingDataset = getTrainingDatasetById(featurestore, trainingDatasetDTO.getId()); + + activityFacade.persistActivity(ActivityFacade.EDITED_TRAINING_DATASET + trainingDatasetDTO.getName(), + project, user, ActivityFlag.SERVICE); + return convertTrainingDatasetToDTO(user, project, trainingDataset); } @@ -748,22 +914,38 @@ public Query getQuery(TrainingDataset trainingDataset, boolean withLabel, Projec if (!trainingDataset.isQuery()) { throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_NO_QUERY, - Level.FINE, "Inference vector is only available for datasets generated by queries"); + Level.FINE, "Inference vector is only available for datasets generated by queries"); } List joins = getJoinsSorted(trainingDataset); - // Convert all the TrainingDatasetFeatures to QueryFeatures - Map fgAliasLookup = getAliasLookupTable(joins); - // These features are for the select part and are from different feature groups // to respect the ordering, all selected features are added to the left most Query instead of splitting them // over the querys for their respective origin feature group List tdFeatures = getFeaturesSorted(trainingDataset, withLabel); + return getQuery(joins, tdFeatures, trainingDataset.getFilters(), project, user, isHiveEngine); + } + + private void setCommitTime(Query query) throws FeaturestoreException { + if (query.getFeaturegroup().getCachedFeaturegroup() != null && + query.getFeaturegroup().getCachedFeaturegroup().getTimeTravelFormat() == TimeTravelFormat.HUDI) { + FeatureGroupCommit endCommit = + featureGroupCommitCommitController.findCommitByDate(query.getFeaturegroup(), null); + query.setLeftFeatureGroupEndTimestamp(endCommit.getCommittedOn()); + query.setLeftFeatureGroupEndCommitId(endCommit.getFeatureGroupCommitPK().getCommitId()); + } + } + + public Query getQuery(List joins, List tdFeatures, + Collection trainingDatasetFilters, Project project, + Users user, Boolean isHiveEngine) throws FeaturestoreException { + + // Convert all the TrainingDatasetFeatures to QueryFeatures + Map fgAliasLookup = getAliasLookupTable(joins); // Check that all the feature groups still exists, if not throw a reasonable error if (tdFeatures.stream().anyMatch(j -> j.getFeatureGroup() == null)) { - throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_QUERY_FG_DELETED, Level.FINE); + throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.QUERY_FAILED_FG_DELETED, Level.FINE); } // Get available features for all involved feature groups once, and save in map fgId -> availableFeatures @@ -772,8 +954,9 @@ public Query getQuery(TrainingDataset trainingDataset, boolean withLabel, Projec if (!availableFeaturesLookup.containsKey(join.getFeatureGroup().getId())) { List availableFeatures = featuregroupController.getFeatures(join.getFeatureGroup(), project, user) .stream() - .map(f -> new Feature(f.getName(), fgAliasLookup.get(join.getId()), f.getType(), f.getDefaultValue(), - f.getPrimary(), join.getFeatureGroup(), join.getPrefix())) + .map(f -> new Feature(f.getName(), + fgAliasLookup.get(join.getId()), f.getType(), f.getPrimary(), f.getDefaultValue(), join.getPrefix(), + join.getFeatureGroup())) .collect(Collectors.toList()); availableFeaturesLookup.put(join.getFeatureGroup().getId(), availableFeatures); } @@ -820,8 +1003,11 @@ public Query getQuery(TrainingDataset trainingDataset, boolean withLabel, Projec queryJoins.add(getQueryJoin(query, joins.get(i), fgAliasLookup, fsLookup, availableFeaturesLookup, isHiveEngine)); } query.setJoins(queryJoins); - FilterLogic filterLogic = convertToFilterLogic(trainingDataset.getFilters(), featureLookup, "L"); + FilterLogic filterLogic = convertToFilterLogic(trainingDatasetFilters, featureLookup, "L"); query.setFilter(filterLogic); + + setCommitTime(query); + return query; } @@ -842,7 +1028,7 @@ public Query getQuery(TrainingDataset trainingDataset, boolean withLabel, Projec FilterLogic convertToFilterLogic(Collection trainingDatasetFilters, Map features, String headPath) throws FeaturestoreException { - if (trainingDatasetFilters.size() == 0) { + if (trainingDatasetFilters == null || trainingDatasetFilters.size() == 0) { return null; } FilterLogic filterLogic = new FilterLogic(); @@ -957,7 +1143,11 @@ public List getFeaturesSorted(TrainingDataset trainingDa } public List getJoinsSorted(TrainingDataset trainingDataset) { - return trainingDataset.getJoins().stream() + return getJoinsSorted(trainingDataset.getJoins()); + } + + public List getJoinsSorted(Collection joins) { + return joins.stream() .sorted(Comparator.comparing(TrainingDatasetJoin::getIndex)) .collect(Collectors.toList()); } @@ -979,6 +1169,8 @@ public Join getQueryJoin(Query leftQuery, TrainingDatasetJoin rightTdJoin, Map leftOn = rightTdJoin.getConditions().stream() .map(c -> new Feature(c.getLeftFeature())).collect(Collectors.toList()); diff --git a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/trainingdatasets/TrainingDatasetDTO.java b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/trainingdatasets/TrainingDatasetDTO.java index 41a742a680..f9ca7326e7 100644 --- a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/trainingdatasets/TrainingDatasetDTO.java +++ b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/trainingdatasets/TrainingDatasetDTO.java @@ -27,7 +27,9 @@ import javax.xml.bind.annotation.XmlElement; import javax.xml.bind.annotation.XmlRootElement; +import javax.xml.bind.annotation.adapters.XmlJavaTypeAdapter; import java.util.ArrayList; +import java.util.Date; import java.util.List; import java.util.stream.Collectors; @@ -36,7 +38,7 @@ * using jaxb. */ @XmlRootElement -public class TrainingDatasetDTO extends FeaturestoreEntityDTO { +public class TrainingDatasetDTO extends FeaturestoreEntityDTO { private String dataFormat; private Boolean coalesce; @@ -58,8 +60,10 @@ public class TrainingDatasetDTO extends FeaturestoreEntityDTO { private List features; private Double sampleRatio; - private String eventStartTime; - private String eventEndTime; + @XmlJavaTypeAdapter(DateAdapter.class) + private Date eventStartTime; + @XmlJavaTypeAdapter(DateAdapter.class) + private Date eventEndTime; public TrainingDatasetDTO() { } @@ -78,6 +82,9 @@ public TrainingDatasetDTO(TrainingDataset trainingDataset) { this.seed = trainingDataset.getSeed(); this.fromQuery = trainingDataset.isQuery(); this.trainSplit = trainingDataset.getTrainSplit(); + this.eventStartTime = trainingDataset.getStartTime(); + this.eventEndTime = trainingDataset.getEndTime(); + this.sampleRatio = trainingDataset.getSampleRatio(); } @XmlElement @@ -174,6 +181,30 @@ public void setTrainSplit(String trainSplit) { this.trainSplit = trainSplit; } + public Double getSampleRatio() { + return sampleRatio; + } + + public void setSampleRatio(Double sampleRatio) { + this.sampleRatio = sampleRatio; + } + + public Date getEventStartTime() { + return eventStartTime; + } + + public void setEventStartTime(Date eventStartTime) { + this.eventStartTime = eventStartTime; + } + + public Date getEventEndTime() { + return eventEndTime; + } + + public void setEventEndTime(Date eventEndTime) { + this.eventEndTime = eventEndTime; + } + @Override public String toString() { return "TrainingDatasetDTO{" + diff --git a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/trainingdatasets/TrainingDatasetDTOBuilder.java b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/trainingdatasets/TrainingDatasetDTOBuilder.java new file mode 100644 index 0000000000..01781659db --- /dev/null +++ b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/trainingdatasets/TrainingDatasetDTOBuilder.java @@ -0,0 +1,65 @@ +/* + * This file is part of Hopsworks + * Copyright (C) 2022, Logical Clocks AB. All rights reserved + * + * Hopsworks is free software: you can redistribute it and/or modify it under the terms of + * the GNU Affero General Public License as published by the Free Software Foundation, + * either version 3 of the License, or (at your option) any later version. + * + * Hopsworks is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License along with this program. + * If not, see . + */ + +package io.hops.hopsworks.common.featurestore.trainingdatasets; + +import com.google.common.collect.Lists; +import io.hops.hopsworks.exceptions.FeaturestoreException; +import io.hops.hopsworks.exceptions.ServiceException; +import io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset; +import io.hops.hopsworks.persistence.entity.project.Project; +import io.hops.hopsworks.persistence.entity.user.Users; + +import javax.ejb.EJB; +import javax.ejb.Stateless; +import javax.ejb.TransactionAttribute; +import javax.ejb.TransactionAttributeType; +import javax.ws.rs.core.UriInfo; +import java.util.List; + +@Stateless +@TransactionAttribute(TransactionAttributeType.NEVER) +public class TrainingDatasetDTOBuilder { + + @EJB + private TrainingDatasetController trainingDatasetController; + + public TrainingDatasetDTO build(Users user, Project project, TrainingDataset trainingDataset, UriInfo uriInfo) throws + FeaturestoreException, ServiceException { + TrainingDatasetDTO trainingDatasetDTO = trainingDatasetController.convertTrainingDatasetToDTO(user, project, + trainingDataset, true); + trainingDatasetDTO.setHref(uriInfo.getRequestUri()); + return trainingDatasetDTO; + } + + public TrainingDatasetDTO build(Users user, Project project, List trainingDatasets, + UriInfo uriInfo) throws FeaturestoreException, ServiceException { + TrainingDatasetDTO trainingDatasetDTO = new TrainingDatasetDTO(); + trainingDatasetDTO.setCount((long) trainingDatasets.size()); + trainingDatasetDTO.setHref(uriInfo.getRequestUri()); + trainingDatasetDTO.setItems(Lists.newArrayList()); + for (TrainingDataset trainingDataset: trainingDatasets) { + TrainingDatasetDTO trainingDatasetDTOItem = build(user, project, trainingDataset, uriInfo); + trainingDatasetDTOItem.setHref(uriInfo.getRequestUriBuilder() + .path("version") + .path(trainingDataset.getVersion().toString()) + .build()); + trainingDatasetDTO.getItems().add(trainingDatasetDTOItem); + } + return trainingDatasetDTO; + } + +} diff --git a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/trainingdatasets/TrainingDatasetFacade.java b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/trainingdatasets/TrainingDatasetFacade.java index 925d4d7dcd..9473229931 100644 --- a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/trainingdatasets/TrainingDatasetFacade.java +++ b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/trainingdatasets/TrainingDatasetFacade.java @@ -19,8 +19,11 @@ import io.hops.hopsworks.common.dao.AbstractFacade; import io.hops.hopsworks.common.featurestore.trainingdatasets.external.ExternalTrainingDatasetFacade; import io.hops.hopsworks.common.featurestore.trainingdatasets.hopsfs.HopsfsTrainingDatasetFacade; +import io.hops.hopsworks.exceptions.FeaturestoreException; import io.hops.hopsworks.persistence.entity.featurestore.Featurestore; +import io.hops.hopsworks.persistence.entity.featurestore.featureview.FeatureView; import io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset; +import io.hops.hopsworks.restutils.RESTCodes; import javax.ejb.EJB; import javax.ejb.Stateless; @@ -30,6 +33,7 @@ import javax.persistence.TypedQuery; import java.util.List; import java.util.Optional; +import java.util.logging.Level; import java.util.logging.Logger; /** @@ -98,6 +102,21 @@ public List findByNameAndFeaturestore(String name, Featurestore .setParameter("name", name) .getResultList(); } + + /** + * Retrieves a list of trainingDataset (different versions) given their name and feature store from the database + * + * @param name name of the trainingDataset + * @param featurestore featurestore of the trainingDataset + * @return a single TrainingDataset entity + */ + public List findByNameAndFeaturestoreExcludeFeatureView(String name, + Featurestore featurestore) { + return em.createNamedQuery("TrainingDataset.findByFeaturestoreAndNameExcludeFeatureView", TrainingDataset.class) + .setParameter("featurestore", featurestore) + .setParameter("name", name) + .getResultList(); + } /** * Retrieves a list of trainingDataset (different versions) given their name and feature store from the database @@ -143,8 +162,8 @@ public Optional findByNameVersionAndFeaturestore(String name, I */ @Override public List findAll() { - TypedQuery q = em.createNamedQuery("TrainingDataset.findAll", TrainingDataset.class); - return q.getResultList(); + TypedQuery query = em.createNamedQuery("TrainingDataset.findAll", TrainingDataset.class); + return query.getResultList(); } /** @@ -154,9 +173,9 @@ public List findAll() { * @return */ public List findByFeaturestore(Featurestore featurestore) { - TypedQuery q = em.createNamedQuery("TrainingDataset.findByFeaturestore", TrainingDataset.class) + TypedQuery query = em.createNamedQuery("TrainingDataset.findByFeaturestore", TrainingDataset.class) .setParameter("featurestore", featurestore); - return q.getResultList(); + return query.getResultList(); } public Long countByFeaturestore(Featurestore featurestore) { @@ -165,6 +184,48 @@ public Long countByFeaturestore(Featurestore featurestore) { .getSingleResult(); } + public TrainingDataset findByFeatureViewAndVersion(FeatureView featureView, Integer version) + throws FeaturestoreException { + TypedQuery query = + em.createNamedQuery("TrainingDataset.findByFeatureViewAndVersion", TrainingDataset.class) + .setParameter("featureView", featureView) + .setParameter("version", version); + return query.getResultList().stream() + .findFirst() + .orElseThrow(() -> new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_NOT_FOUND, + Level.FINE, String.format("FeatureView name: %s, version: %s, td version: %s", featureView.getName(), + featureView.getVersion(), version))); + } + + public Optional findByFeatureViewAndVersionNullable(FeatureView featureView, Integer version) { + TypedQuery query = + em.createNamedQuery("TrainingDataset.findByFeatureViewAndVersion", TrainingDataset.class) + .setParameter("featureView", featureView) + .setParameter("version", version); + return query.getResultList() + .stream() + .findFirst(); + } + + /** + * Retrieves a list of trainingDataset (different versions) given a featureView from the database + * ordered by their version number in descending order + * + * @param featureView + * @return list of trainingDataset + */ + public List findByFeatureViewAndVersionOrderedDescVersion(FeatureView featureView) { + return em.createNamedQuery("TrainingDataset.findByFeatureViewOrderedByDescVersion", TrainingDataset.class) + .setParameter("featureView", featureView) + .getResultList(); + } + + public List findByFeatureView(FeatureView featureView) { + TypedQuery query = em.createNamedQuery("TrainingDataset.findByFeatureView", TrainingDataset.class) + .setParameter("featureView", featureView); + return query.getResultList(); + } + /** * Gets the entity manager of the facade * @@ -174,4 +235,17 @@ public Long countByFeaturestore(Featurestore featurestore) { protected EntityManager getEntityManager() { return em; } + + public void removeTrainingDataset(TrainingDataset trainingDataset) { + switch (trainingDataset.getTrainingDatasetType()) { + case HOPSFS_TRAINING_DATASET: + hopsfsTrainingDatasetFacade.remove(trainingDataset.getHopsfsTrainingDataset()); + break; + case EXTERNAL_TRAINING_DATASET: + externalTrainingDatasetFacade.remove(trainingDataset.getExternalTrainingDataset()); + break; + } + + remove(trainingDataset); + } } diff --git a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/trainingdatasets/TrainingDatasetInputValidation.java b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/trainingdatasets/TrainingDatasetInputValidation.java index da2b5c4ad8..b63f670547 100644 --- a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/trainingdatasets/TrainingDatasetInputValidation.java +++ b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/trainingdatasets/TrainingDatasetInputValidation.java @@ -100,10 +100,13 @@ public void validate(TrainingDatasetDTO trainingDatasetDTO, Query query) throws private void validateType(TrainingDatasetType trainingDatasetType) throws FeaturestoreException { if (trainingDatasetType != TrainingDatasetType.HOPSFS_TRAINING_DATASET && - trainingDatasetType != TrainingDatasetType.EXTERNAL_TRAINING_DATASET) { + trainingDatasetType != TrainingDatasetType.EXTERNAL_TRAINING_DATASET && + trainingDatasetType != TrainingDatasetType.IN_MEMORY_TRAINING_DATASET) { throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.ILLEGAL_TRAINING_DATASET_TYPE, Level.FINE, ", Recognized Training Dataset types are: " + TrainingDatasetType.HOPSFS_TRAINING_DATASET + ", and: " + - TrainingDatasetType.EXTERNAL_TRAINING_DATASET+ ". The provided training dataset type was not recognized: " + TrainingDatasetType.EXTERNAL_TRAINING_DATASET + ", and: " + + TrainingDatasetType.IN_MEMORY_TRAINING_DATASET + + ". The provided training dataset type was not recognized: " + trainingDatasetType); } } diff --git a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/trainingdatasets/hopsfs/HopsfsTrainingDatasetController.java b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/trainingdatasets/hopsfs/HopsfsTrainingDatasetController.java index fe3d3554cd..5b1c49a18f 100644 --- a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/trainingdatasets/hopsfs/HopsfsTrainingDatasetController.java +++ b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/trainingdatasets/hopsfs/HopsfsTrainingDatasetController.java @@ -67,15 +67,16 @@ public TrainingDatasetDTO convertHopsfsTrainingDatasetToDTO(TrainingDatasetDTO t } HopsfsTrainingDataset hopsfsTrainingDataset = trainingDataset.getHopsfsTrainingDataset(); + if (hopsfsTrainingDataset != null) { + trainingDatasetDTO.setLocation(new Path(DistributedFileSystemOps.HOPSFS_SCHEME, + namenodeService.getAddress() + ":" + namenodeService.getPort(), + inodeController.getPath(hopsfsTrainingDataset.getInode())).toString()); + trainingDatasetDTO.setInodeId(hopsfsTrainingDataset.getInode().getId()); - trainingDatasetDTO.setLocation(new Path(DistributedFileSystemOps.HOPSFS_SCHEME, - namenodeService.getAddress() + ":" + namenodeService.getPort(), - inodeController.getPath(hopsfsTrainingDataset.getInode())).toString()); - trainingDatasetDTO.setInodeId(hopsfsTrainingDataset.getInode().getId()); - - FeaturestoreHopsfsConnectorDTO hopsfsConnectorDTO = - new FeaturestoreHopsfsConnectorDTO(hopsfsTrainingDataset.getFeaturestoreConnector()); - trainingDatasetDTO.setStorageConnector(hopsfsConnectorDTO); + FeaturestoreHopsfsConnectorDTO hopsfsConnectorDTO = + new FeaturestoreHopsfsConnectorDTO(hopsfsTrainingDataset.getFeaturestoreConnector()); + trainingDatasetDTO.setStorageConnector(hopsfsConnectorDTO); + } return trainingDatasetDTO; } } diff --git a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/xattr/dto/FeatureViewXAttrDTO.java b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/xattr/dto/FeatureViewXAttrDTO.java new file mode 100644 index 0000000000..2b961fbd92 --- /dev/null +++ b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/xattr/dto/FeatureViewXAttrDTO.java @@ -0,0 +1,96 @@ +/* + * This file is part of Hopsworks + * Copyright (C) 2022, Logical Clocks AB. All rights reserved + * + * Hopsworks is free software: you can redistribute it and/or modify it under the terms of + * the GNU Affero General Public License as published by the Free Software Foundation, + * either version 3 of the License, or (at your option) any later version. + * + * Hopsworks is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License along with this program. + * If not, see . + */ +package io.hops.hopsworks.common.featurestore.xattr.dto; + +import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlRootElement; +import java.util.Date; +import java.util.LinkedList; +import java.util.List; + +/** + * document attached as an xattr to a feature view directory + */ +@XmlRootElement +public class FeatureViewXAttrDTO { + @XmlElement(nillable = false, name = FeaturestoreXAttrsConstants.FEATURESTORE_ID) + private Integer featurestoreId; + @XmlElement(nillable = true, name = FeaturestoreXAttrsConstants.DESCRIPTION) + private String description; + @XmlElement(nillable = true, name = FeaturestoreXAttrsConstants.CREATE_DATE) + private Long createDate; + @XmlElement(nillable = true, name = FeaturestoreXAttrsConstants.CREATOR) + private String creator; + @XmlElement(nillable = false, name = FeaturestoreXAttrsConstants.FV_FEATURES) + private List features = new LinkedList<>(); + + public FeatureViewXAttrDTO() { + } + + public FeatureViewXAttrDTO(Integer featurestoreId, String description, + Date createDate, String creator) { + this(featurestoreId, description, createDate, creator, new LinkedList<>()); + } + + public FeatureViewXAttrDTO(Integer featurestoreId, String description, + Date createDate, String creator, List features) { + this.featurestoreId = featurestoreId; + this.description = description; + this.createDate = createDate.getTime(); + this.creator = creator; + this.features = features; + } + + public Integer getFeaturestoreId() { + return featurestoreId; + } + + public void setFeaturestoreId(Integer featurestoreId) { + this.featurestoreId = featurestoreId; + } + + public String getDescription() { + return description; + } + + public void setDescription(String description) { + this.description = description; + } + + public Long getCreateDate() { + return createDate; + } + + public void setCreateDate(Long createDate) { + this.createDate = createDate; + } + + public String getCreator() { + return creator; + } + + public void setCreator(String creator) { + this.creator = creator; + } + + public List getFeatures() { + return features; + } + + public void setFeatures(List features) { + this.features = features; + } +} diff --git a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/xattr/dto/FeaturestoreXAttrsConstants.java b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/xattr/dto/FeaturestoreXAttrsConstants.java index 62dab99cbd..bf9980d5d7 100644 --- a/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/xattr/dto/FeaturestoreXAttrsConstants.java +++ b/hopsworks-common/src/main/java/io/hops/hopsworks/common/featurestore/xattr/dto/FeaturestoreXAttrsConstants.java @@ -28,6 +28,7 @@ public class FeaturestoreXAttrsConstants { public static final String VERSION = "version"; public static final String FG_FEATURES = "fg_features"; public static final String TD_FEATURES = "td_features"; + public static final String FV_FEATURES = "fv_features"; public static final String DESCRIPTION = "description"; public static final String CREATE_DATE = "create_date"; public static final String CREATOR = "creator"; diff --git a/hopsworks-common/src/main/java/io/hops/hopsworks/common/project/ProjectController.java b/hopsworks-common/src/main/java/io/hops/hopsworks/common/project/ProjectController.java index 7b91e03f7e..032c45bf5e 100644 --- a/hopsworks-common/src/main/java/io/hops/hopsworks/common/project/ProjectController.java +++ b/hopsworks-common/src/main/java/io/hops/hopsworks/common/project/ProjectController.java @@ -969,7 +969,7 @@ private void addServiceFeaturestore(Project project, Users user, Dataset trainingDatasets = datasetController.getByProjectAndDsName(project, null, project.getName() + "_" + Settings.ServiceDataset.TRAININGDATASETS.getName()); Featurestore featurestore = featurestoreController.createProjectFeatureStore(project, user, featurestoreName, - trainingDatasets); + trainingDatasets); //Create Hopsworks Dataset of the HiveDb hiveController.createDatasetDb(project, user, dfso, featurestoreName, DatasetType.FEATURESTORE, featurestore, datasetProvCore); diff --git a/hopsworks-common/src/main/java/io/hops/hopsworks/common/provenance/core/HopsFSProvenanceController.java b/hopsworks-common/src/main/java/io/hops/hopsworks/common/provenance/core/HopsFSProvenanceController.java index 03e70818d9..2ac5c144e8 100644 --- a/hopsworks-common/src/main/java/io/hops/hopsworks/common/provenance/core/HopsFSProvenanceController.java +++ b/hopsworks-common/src/main/java/io/hops/hopsworks/common/provenance/core/HopsFSProvenanceController.java @@ -22,6 +22,7 @@ import io.hops.hopsworks.common.featurestore.featuregroup.ondemand.OnDemandFeaturegroupDTO; import io.hops.hopsworks.common.featurestore.featuregroup.stream.StreamFeatureGroupDTO; import io.hops.hopsworks.common.featurestore.trainingdatasets.TrainingDatasetDTO; +import io.hops.hopsworks.common.featurestore.xattr.dto.FeatureViewXAttrDTO; import io.hops.hopsworks.common.featurestore.xattr.dto.FeaturegroupXAttr; import io.hops.hopsworks.common.featurestore.xattr.dto.FeaturestoreXAttrsConstants; import io.hops.hopsworks.common.featurestore.xattr.dto.TrainingDatasetXAttrDTO; @@ -41,6 +42,8 @@ import io.hops.hopsworks.exceptions.ProvenanceException; import io.hops.hopsworks.persistence.entity.dataset.Dataset; import io.hops.hopsworks.persistence.entity.dataset.DatasetSharedWith; +import io.hops.hopsworks.persistence.entity.featurestore.featureview.FeatureView; +import io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFeature; import io.hops.hopsworks.persistence.entity.hdfs.inode.Inode; import io.hops.hopsworks.persistence.entity.project.Project; import io.hops.hopsworks.persistence.entity.user.Users; @@ -282,6 +285,36 @@ public void trainingDatasetAttachXAttr(String path, TrainingDatasetDTO trainingD "hopsfs - set xattr - training dataset - error", "hopsfs - set xattr - training dataset - error", e); } } + + public void featureViewAttachXAttr(String path, FeatureView featureView, DistributedFileSystemOps udfso) + throws ProvenanceException { + FeatureViewXAttrDTO fv = new FeatureViewXAttrDTO(featureView.getFeaturestore().getId(), + featureView.getDescription(), + featureView.getCreated(), + featureView.getCreator().getEmail(), + fromFeatureViewQuery(featureView)); + + try { + byte[] xattrVal = converter.marshal(fv).getBytes(); + try{ + xattrCtrl.upsertProvXAttr(udfso, path, FeaturestoreXAttrsConstants.FEATURESTORE, xattrVal); + } catch (MetadataException e) { + if (RESTCodes.MetadataErrorCode.METADATA_MAX_SIZE_EXCEEDED.equals(e.getErrorCode())) { + LOGGER.log(Level.INFO, + "xattr is too large to attach - feature view:{0} will not have features attached", path); + fv = new FeatureViewXAttrDTO(featureView.getFeaturestore().getId(), featureView.getDescription(), + featureView.getCreated(), featureView.getCreator().getEmail()); + xattrVal = converter.marshal(fv).getBytes(); + xattrCtrl.upsertProvXAttr(udfso, path, FeaturestoreXAttrsConstants.FEATURESTORE, xattrVal); + } else { + throw e; + } + } + } catch (GenericException | MetadataException | DatasetException e) { + throw new ProvenanceException(RESTCodes.ProvenanceErrorCode.FS_ERROR, Level.WARNING, + "hopsfs - set xattr - feature view - error", "hopsfs - set xattr - feature view - error", e); + } + } public ProvTypeDTO getMetaStatus(Users user, Project project, Boolean searchable) throws ProvenanceException { if(searchable != null && searchable) { @@ -296,7 +329,6 @@ public ProvTypeDTO getMetaStatus(Users user, Project project, Boolean searchable } } - private List fromTrainingDataset(TrainingDatasetDTO trainingDatasetDTO) { if (trainingDatasetDTO.getFromQuery()) { // training dataset generated from hsfs query @@ -321,6 +353,20 @@ private List fromTrainingDatasetQuery(TrainingD return new ArrayList<>(featuregroups.values()); } + private List fromFeatureViewQuery(FeatureView featureView) { + Map featuregroups = new HashMap<>(); + for(TrainingDatasetFeature feature : featureView.getFeatures()) { + FeaturegroupXAttr.SimplifiedDTO featuregroup = featuregroups.get(feature.getFeatureGroup().getId()); + if(featuregroup == null) { + featuregroup = new FeaturegroupXAttr.SimplifiedDTO(feature.getFeatureGroup().getFeaturestore().getId(), + feature.getFeatureGroup().getName(), feature.getFeatureGroup().getVersion()); + featuregroups.put(feature.getFeatureGroup().getId(), featuregroup); + } + featuregroup.addFeature(feature.getName()); + } + return new ArrayList<>(featuregroups.values()); + } + private List fromTrainingDatasetDataframe(TrainingDatasetDTO trainingDatasetDTO) { FeaturegroupXAttr.SimplifiedDTO containerFeatureGroup = new FeaturegroupXAttr.SimplifiedDTO(-1, "", -1); diff --git a/hopsworks-common/src/main/java/io/hops/hopsworks/common/util/HopsworksJAXBContext.java b/hopsworks-common/src/main/java/io/hops/hopsworks/common/util/HopsworksJAXBContext.java index 3b6f7f0d56..d487a436d5 100644 --- a/hopsworks-common/src/main/java/io/hops/hopsworks/common/util/HopsworksJAXBContext.java +++ b/hopsworks-common/src/main/java/io/hops/hopsworks/common/util/HopsworksJAXBContext.java @@ -15,6 +15,7 @@ */ package io.hops.hopsworks.common.util; +import io.hops.hopsworks.common.featurestore.xattr.dto.FeatureViewXAttrDTO; import io.hops.hopsworks.common.provenance.core.dto.ProvCoreDTO; import io.hops.hopsworks.common.featurestore.xattr.dto.FeaturegroupXAttr; import io.hops.hopsworks.common.featurestore.xattr.dto.TrainingDatasetXAttrDTO; @@ -63,7 +64,8 @@ public void init() { FeaturegroupXAttr.FullDTO.class, FeaturegroupXAttr.SimplifiedDTO.class, FeaturegroupXAttr.SimpleFeatureDTO.class, - TrainingDatasetXAttrDTO.class + TrainingDatasetXAttrDTO.class, + FeatureViewXAttrDTO.class }, properties); } catch (JAXBException e) { e.printStackTrace(); diff --git a/hopsworks-common/src/test/io/hops/hopsworks/common/featurestore/query/TestQueryController.java b/hopsworks-common/src/test/io/hops/hopsworks/common/featurestore/query/TestQueryController.java index 374f60069d..7794ce77f2 100644 --- a/hopsworks-common/src/test/io/hops/hopsworks/common/featurestore/query/TestQueryController.java +++ b/hopsworks-common/src/test/io/hops/hopsworks/common/featurestore/query/TestQueryController.java @@ -16,15 +16,18 @@ package io.hops.hopsworks.common.featurestore.query; +import com.google.common.collect.Lists; import io.hops.hopsworks.common.featurestore.FeaturestoreFacade; import io.hops.hopsworks.common.featurestore.feature.FeatureGroupFeatureDTO; import io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupController; +import io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupDTO; import io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupFacade; import io.hops.hopsworks.common.featurestore.featuregroup.cached.CachedFeaturegroupController; import io.hops.hopsworks.common.featurestore.online.OnlineFeaturestoreController; import io.hops.hopsworks.common.featurestore.query.filter.FilterController; import io.hops.hopsworks.common.featurestore.query.join.Join; import io.hops.hopsworks.common.featurestore.query.join.JoinController; +import io.hops.hopsworks.common.featurestore.query.join.JoinDTO; import io.hops.hopsworks.exceptions.FeaturestoreException; import io.hops.hopsworks.persistence.entity.featurestore.Featurestore; import io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup; @@ -278,5 +281,45 @@ public void testRemoveDuplicateColumnsPitEnabled() throws Exception { Assert.assertEquals("ft2", rightQuery.getFeatures().get(1).getName()); } + @Test + public void testCheckNestedJoin_nestedJoinNotExist() throws Exception { + FeaturegroupDTO fg1 = new FeaturegroupDTO(); + fg1.setId(1); + FeaturegroupDTO fg2 = new FeaturegroupDTO(); + fg2.setId(2); + + List requestedFeatures = new ArrayList<>(); + requestedFeatures.add(new FeatureGroupFeatureDTO("*")); + + QueryDTO rightQueryDTO = new QueryDTO(fg2, requestedFeatures); + JoinDTO joinDTO = new JoinDTO(rightQueryDTO, null, null); + + QueryDTO queryDTO = new QueryDTO(fg1, requestedFeatures, Arrays.asList(joinDTO)); + target.checkNestedJoin(queryDTO); + } + + @Test(expected = FeaturestoreException.class) + public void testCheckNestedJoin_nestedJoinExist() throws Exception { + // fg1 join (fg2 join fg3) + FeaturegroupDTO fg1 = new FeaturegroupDTO(); + fg1.setId(1); + FeaturegroupDTO fg2 = new FeaturegroupDTO(); + fg2.setId(2); + FeaturegroupDTO fg3 = new FeaturegroupDTO(); + fg2.setId(3); + List requestedFeatures = new ArrayList<>(); + requestedFeatures.add(new FeatureGroupFeatureDTO("*")); + + QueryDTO queryDTO2 = new QueryDTO(fg2, requestedFeatures); + JoinDTO joinDTO2 = new JoinDTO(queryDTO2, null, null); + + QueryDTO queryDTO3 = new QueryDTO(fg3, requestedFeatures); + queryDTO3.setJoins(Lists.newArrayList(joinDTO2)); + JoinDTO joinDTO3 = new JoinDTO(queryDTO3, null, null); + + + QueryDTO queryDTO = new QueryDTO(fg1, requestedFeatures, Arrays.asList(joinDTO3)); + target.checkNestedJoin(queryDTO); + } } diff --git a/hopsworks-persistence/src/main/java/io/hops/hopsworks/persistence/entity/featurestore/activity/FeaturestoreActivity.java b/hopsworks-persistence/src/main/java/io/hops/hopsworks/persistence/entity/featurestore/activity/FeaturestoreActivity.java index e11262f055..7a3fe09ae4 100644 --- a/hopsworks-persistence/src/main/java/io/hops/hopsworks/persistence/entity/featurestore/activity/FeaturestoreActivity.java +++ b/hopsworks-persistence/src/main/java/io/hops/hopsworks/persistence/entity/featurestore/activity/FeaturestoreActivity.java @@ -118,6 +118,7 @@ public class FeaturestoreActivity implements Serializable { @JoinColumn(name = "training_dataset_id", referencedColumnName = "id") private TrainingDataset trainingDataset; + @JoinColumn(name = "feature_view_id", referencedColumnName = "id") private FeatureView featureView; @@ -227,6 +228,14 @@ public void setTrainingDataset(TrainingDataset trainingDataset) { this.trainingDataset = trainingDataset; } + public FeatureView getFeatureView() { + return featureView; + } + + public void setFeatureView(FeatureView featureView) { + this.featureView = featureView; + } + @Override public boolean equals(Object o) { if (this == o) return true; diff --git a/hopsworks-persistence/src/main/java/io/hops/hopsworks/persistence/entity/featurestore/activity/FeaturestoreActivityMeta.java b/hopsworks-persistence/src/main/java/io/hops/hopsworks/persistence/entity/featurestore/activity/FeaturestoreActivityMeta.java index ee0dfd0730..0252089580 100644 --- a/hopsworks-persistence/src/main/java/io/hops/hopsworks/persistence/entity/featurestore/activity/FeaturestoreActivityMeta.java +++ b/hopsworks-persistence/src/main/java/io/hops/hopsworks/persistence/entity/featurestore/activity/FeaturestoreActivityMeta.java @@ -22,7 +22,8 @@ public enum FeaturestoreActivityMeta { FG_ALTERED("Feature group was altered"), ONLINE_ENABLED("Feature group available online"), ONLINE_DISABLED("Feature group not available online"), - TD_CREATED("The training dataset was created"); + TD_CREATED("The training dataset was created"), + FV_CREATED("The feature view was created"); private String value; diff --git a/hopsworks-persistence/src/main/java/io/hops/hopsworks/persistence/entity/featurestore/featureview/FeatureView.java b/hopsworks-persistence/src/main/java/io/hops/hopsworks/persistence/entity/featurestore/featureview/FeatureView.java index 3ad472c937..f20e1a56cf 100644 --- a/hopsworks-persistence/src/main/java/io/hops/hopsworks/persistence/entity/featurestore/featureview/FeatureView.java +++ b/hopsworks-persistence/src/main/java/io/hops/hopsworks/persistence/entity/featurestore/featureview/FeatureView.java @@ -20,7 +20,9 @@ import io.hops.hopsworks.persistence.entity.featurestore.activity.FeaturestoreActivity; import io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset; import io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFeature; +import io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFilter; import io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetJoin; +import io.hops.hopsworks.persistence.entity.hdfs.inode.Inode; import io.hops.hopsworks.persistence.entity.user.Users; import javax.persistence.Basic; @@ -31,6 +33,7 @@ import javax.persistence.GenerationType; import javax.persistence.Id; import javax.persistence.JoinColumn; +import javax.persistence.JoinColumns; import javax.persistence.ManyToOne; import javax.persistence.NamedQueries; import javax.persistence.NamedQuery; @@ -87,19 +90,22 @@ public class FeatureView implements Serializable { @Basic(optional = false) @Column(name = "description") private String description; - @Basic - @Column(name = "label") - private String label; @OneToMany(cascade = CascadeType.ALL, mappedBy = "featureView") private Collection features; - //TODO featureview: add TrainingDatasetFilterCondition; + @OneToMany(cascade = CascadeType.ALL, mappedBy = "featureView") + private Collection filters; @OneToMany(cascade = CascadeType.ALL, mappedBy = "featureView") private Collection joins; - // TODO featureview: can be removed? @OneToMany(cascade = CascadeType.ALL, mappedBy = "featureView") private Collection activities; @OneToMany(cascade = CascadeType.ALL, mappedBy = "featureView") private Collection trainingDatasets; + @JoinColumns({ + @JoinColumn(name = "inode_pid", referencedColumnName = "parent_id"), + @JoinColumn(name = "inode_name", referencedColumnName = "name"), + @JoinColumn(name = "partition_id", referencedColumnName = "partition_id")}) + @ManyToOne(optional = false) + private Inode inode; public FeatureView() { } @@ -160,14 +166,6 @@ public void setDescription(String description) { this.description = description; } - public String getLabel() { - return label; - } - - public void setLabel(String label) { - this.label = label; - } - public Collection getFeatures() { return features; } @@ -177,6 +175,15 @@ public void setFeatures( this.features = features; } + public Collection getFilters() { + return filters; + } + + public void setFilters( + Collection filters) { + this.filters = filters; + } + public Collection getJoins() { return joins; } @@ -204,6 +211,14 @@ public void setTrainingDatasets( this.trainingDatasets = trainingDatasets; } + public Inode getInode() { + return inode; + } + + public void setInode(Inode inode) { + this.inode = inode; + } + @Override public boolean equals(Object o) { if (this == o) { @@ -216,14 +231,12 @@ public boolean equals(Object o) { return Objects.equals(id, that.id) && Objects.equals(name, that.name) && Objects.equals(featurestore, that.featurestore) && Objects.equals(created, that.created) && Objects.equals(creator, that.creator) && Objects.equals(version, that.version) && - Objects.equals(description, that.description) && Objects.equals(label, that.label) && - Objects.equals(features, that.features) && Objects.equals(joins, that.joins) && - Objects.equals(trainingDatasets, that.trainingDatasets); + Objects.equals(description, that.description) && + Objects.equals(features, that.features) && Objects.equals(joins, that.joins); } @Override public int hashCode() { - return Objects.hash(id, name, featurestore, created, creator, version, description, label, features, joins, - trainingDatasets); + return Objects.hash(id, name, featurestore, created, creator, version, description, features, joins); } } diff --git a/hopsworks-persistence/src/main/java/io/hops/hopsworks/persistence/entity/featurestore/trainingdataset/TrainingDataset.java b/hopsworks-persistence/src/main/java/io/hops/hopsworks/persistence/entity/featurestore/trainingdataset/TrainingDataset.java index a605deb9ba..43dbfe2d76 100644 --- a/hopsworks-persistence/src/main/java/io/hops/hopsworks/persistence/entity/featurestore/trainingdataset/TrainingDataset.java +++ b/hopsworks-persistence/src/main/java/io/hops/hopsworks/persistence/entity/featurestore/trainingdataset/TrainingDataset.java @@ -58,21 +58,30 @@ @Table(name = "training_dataset", catalog = "hopsworks") @XmlRootElement @NamedQueries({ - @NamedQuery(name = "TrainingDataset.findAll", query = "SELECT td FROM TrainingDataset td"), - @NamedQuery(name = "TrainingDataset.findById", query = "SELECT td FROM TrainingDataset td WHERE td.id = :id"), - @NamedQuery(name = "TrainingDataset.findByFeaturestore", query = "SELECT td FROM TrainingDataset td " + - "WHERE td.featurestore = :featurestore"), - @NamedQuery(name = "TrainingDataset.countByFeaturestore", query = "SELECT count(td.id) FROM TrainingDataset td " + + @NamedQuery(name = "TrainingDataset.findAll", query = "SELECT td FROM TrainingDataset td"), + @NamedQuery(name = "TrainingDataset.findById", query = "SELECT td FROM TrainingDataset td WHERE td.id = :id"), + @NamedQuery(name = "TrainingDataset.findByFeaturestore", query = "SELECT td FROM TrainingDataset td " + "WHERE td.featurestore = :featurestore"), - @NamedQuery(name = "TrainingDataset.findByFeaturestoreAndId", query = "SELECT td FROM TrainingDataset td " + - "WHERE td.featurestore = :featurestore AND td.id = :id"), - @NamedQuery(name = "TrainingDataset.findByFeaturestoreAndNameVersion", - query = "SELECT td FROM TrainingDataset td WHERE td.featurestore = :featurestore " + - "AND td.name= :name AND td.version = :version"), - @NamedQuery(name = "TrainingDataset.findByFeaturestoreAndName", query = "SELECT td FROM TrainingDataset td " + - "WHERE td.featurestore = :featurestore AND td.name = :name"), - @NamedQuery(name = "TrainingDataset.findByFeaturestoreAndNameOrderedByDescVersion", query = "SELECT td FROM " + - "TrainingDataset td WHERE td.featurestore = :featurestore AND td.name = :name ORDER BY td.version DESC")}) + @NamedQuery(name = "TrainingDataset.countByFeaturestore", query = "SELECT count(td.id) FROM TrainingDataset td " + + "WHERE td.featurestore = :featurestore"), + @NamedQuery(name = "TrainingDataset.findByFeaturestoreAndId", query = "SELECT td FROM TrainingDataset td " + + "WHERE td.featurestore = :featurestore AND td.id = :id"), + @NamedQuery(name = "TrainingDataset.findByFeaturestoreAndNameVersion", + query = "SELECT td FROM TrainingDataset td WHERE td.featurestore = :featurestore " + + "AND td.name= :name AND td.version = :version"), + @NamedQuery(name = "TrainingDataset.findByFeaturestoreAndName", query = "SELECT td FROM TrainingDataset td " + + "WHERE td.featurestore = :featurestore AND td.name = :name"), + @NamedQuery(name = "TrainingDataset.findByFeaturestoreAndNameExcludeFeatureView", + query = "SELECT td FROM TrainingDataset td WHERE td.featurestore = :featurestore AND td.name = :name AND " + + "td.featureView IS NULL"), + @NamedQuery(name = "TrainingDataset.findByFeaturestoreAndNameOrderedByDescVersion", query = "SELECT td FROM " + + "TrainingDataset td WHERE td.featurestore = :featurestore AND td.name = :name ORDER BY td.version DESC"), + @NamedQuery(name = "TrainingDataset.findByFeatureViewAndVersion", query = "SELECT td FROM TrainingDataset td " + + "WHERE td.featureView = :featureView AND td.version = :version"), + @NamedQuery(name = "TrainingDataset.findByFeatureView", query = "SELECT td FROM TrainingDataset td " + + "WHERE td.featureView = :featureView"), + @NamedQuery(name = "TrainingDataset.findByFeatureViewOrderedByDescVersion", query = "SELECT td FROM " + + "TrainingDataset td WHERE td.featureView = :featureView ORDER BY td.version DESC")}) public class TrainingDataset implements Serializable { private static final long serialVersionUID = 1L; @Id @@ -149,6 +158,9 @@ public class TrainingDataset implements Serializable { @JoinColumn(name = "feature_view_id", referencedColumnName = "id") @ManyToOne private FeatureView featureView; + @Basic + @Column(name = "sample_ratio") + private Double sampleRatio; public static long getSerialVersionUID() { return serialVersionUID; @@ -327,6 +339,38 @@ public void setTrainSplit(String trainSplit) { this.trainSplit = trainSplit; } + public Date getStartTime() { + return startTime; + } + + public void setStartTime(Date startTime) { + this.startTime = startTime; + } + + public Date getEndTime() { + return endTime; + } + + public void setEndTime(Date endTime) { + this.endTime = endTime; + } + + public FeatureView getFeatureView() { + return featureView; + } + + public void setFeatureView(FeatureView featureView) { + this.featureView = featureView; + } + + public Double getSampleRatio() { + return sampleRatio; + } + + public void setSampleRatio(Double sampleRatio) { + this.sampleRatio = sampleRatio; + } + @Override public boolean equals(Object o) { if (this == o) return true; @@ -351,6 +395,10 @@ public boolean equals(Object o) { if (!Objects.equals(hopsfsTrainingDataset, that.hopsfsTrainingDataset)) return false; if (!Objects.equals(externalTrainingDataset, that.externalTrainingDataset)) return false; if (!Objects.equals(trainSplit, that.trainSplit)) return false; + if (!Objects.equals(featureView, that.featureView)) return false; + if (!Objects.equals(startTime, that.startTime)) return false; + if (!Objects.equals(endTime, that.endTime)) return false; + if (!Objects.equals(sampleRatio, that.sampleRatio)) return false; return Objects.equals(splits, that.splits); } @@ -374,6 +422,10 @@ public int hashCode() { result = 31 * result + (externalTrainingDataset != null ? externalTrainingDataset.hashCode() : 0); result = 31 * result + (splits != null ? splits.hashCode() : 0); result = 31 * result + (trainSplit != null ? trainSplit.hashCode() : 0); + result = 31 * result + (featureView != null ? featureView.hashCode() : 0); + result = 31 * result + (startTime != null ? startTime.hashCode() : 0); + result = 31 * result + (endTime != null ? endTime.hashCode() : 0); + result = 31 * result + (sampleRatio != null ? sampleRatio.hashCode() : 0); return result; } } diff --git a/hopsworks-persistence/src/main/java/io/hops/hopsworks/persistence/entity/featurestore/trainingdataset/TrainingDatasetFilter.java b/hopsworks-persistence/src/main/java/io/hops/hopsworks/persistence/entity/featurestore/trainingdataset/TrainingDatasetFilter.java index 3cefdc3c7f..8a653946ae 100644 --- a/hopsworks-persistence/src/main/java/io/hops/hopsworks/persistence/entity/featurestore/trainingdataset/TrainingDatasetFilter.java +++ b/hopsworks-persistence/src/main/java/io/hops/hopsworks/persistence/entity/featurestore/trainingdataset/TrainingDatasetFilter.java @@ -15,6 +15,8 @@ */ package io.hops.hopsworks.persistence.entity.featurestore.trainingdataset; +import io.hops.hopsworks.persistence.entity.featurestore.featureview.FeatureView; + import javax.persistence.Basic; import javax.persistence.CascadeType; import javax.persistence.Column; @@ -40,6 +42,9 @@ public class TrainingDatasetFilter implements Serializable { @JoinColumn(name = "training_dataset_id", referencedColumnName = "id") @ManyToOne private TrainingDataset trainingDataset; + @JoinColumn(name = "feature_view_id", referencedColumnName = "id") + @ManyToOne + private FeatureView featureView; @OneToOne(cascade = CascadeType.ALL, mappedBy = "trainingDatasetFilter") private TrainingDatasetFilterCondition condition; @Column(name = "type") @@ -56,6 +61,11 @@ public class TrainingDatasetFilter implements Serializable { public TrainingDatasetFilter() { } + public TrainingDatasetFilter( + FeatureView featureView) { + this.featureView = featureView; + } + public TrainingDatasetFilter( TrainingDataset trainingDataset) { this.trainingDataset = trainingDataset; @@ -70,6 +80,14 @@ public void setTrainingDataset( this.trainingDataset = trainingDataset; } + public FeatureView getFeatureView() { + return featureView; + } + + public void setFeatureView(FeatureView featureView) { + this.featureView = featureView; + } + public String getPath() { return path; } diff --git a/hopsworks-persistence/src/main/java/io/hops/hopsworks/persistence/entity/featurestore/trainingdataset/TrainingDatasetType.java b/hopsworks-persistence/src/main/java/io/hops/hopsworks/persistence/entity/featurestore/trainingdataset/TrainingDatasetType.java index c43a4fffd0..63233e0213 100644 --- a/hopsworks-persistence/src/main/java/io/hops/hopsworks/persistence/entity/featurestore/trainingdataset/TrainingDatasetType.java +++ b/hopsworks-persistence/src/main/java/io/hops/hopsworks/persistence/entity/featurestore/trainingdataset/TrainingDatasetType.java @@ -26,4 +26,6 @@ public enum TrainingDatasetType { HOPSFS_TRAINING_DATASET, @JsonProperty("EXTERNAL_TRAINING_DATASET") EXTERNAL_TRAINING_DATASET, + @JsonProperty("IN_MEMORY_TRAINING_DATASET") + IN_MEMORY_TRAINING_DATASET } \ No newline at end of file diff --git a/hopsworks-rest-utils/src/main/java/io/hops/hopsworks/restutils/RESTCodes.java b/hopsworks-rest-utils/src/main/java/io/hops/hopsworks/restutils/RESTCodes.java index 04e0cf8595..81cbc37709 100644 --- a/hopsworks-rest-utils/src/main/java/io/hops/hopsworks/restutils/RESTCodes.java +++ b/hopsworks-rest-utils/src/main/java/io/hops/hopsworks/restutils/RESTCodes.java @@ -1523,7 +1523,7 @@ public enum FeaturestoreErrorCode implements RESTErrorCode { TRAINING_DATASET_NO_QUERY(111, "The training dataset wasn't generated from a query", Response.Status.BAD_REQUEST), TRAINING_DATASET_NO_SCHEMA(112, "No query or feature schema provided", Response.Status.BAD_REQUEST), - TRAINING_DATASET_QUERY_FG_DELETED(113, "Cannot generate query, some feature groups were deleted", + QUERY_FAILED_FG_DELETED(113, "Cannot generate query, some feature groups were deleted", Response.Status.BAD_REQUEST), ILLEGAL_FEATUREGROUP_UPDATE(114, "Illegal feature group update", Response.Status.BAD_REQUEST), COULD_NOT_ALTER_FEAUTURE_GROUP_METADATA(115, "Failed to alter feature group meta data", @@ -1635,7 +1635,9 @@ public enum FeaturestoreErrorCode implements RESTErrorCode { STREAM_FEATURE_GROUP_ONLINE_DISABLE_ENABLE(184, "Stream feature group cannot be online enabled if it was created as offline only.", Response.Status.BAD_REQUEST), - GCS_FIELD_MISSING(185, "Field missing", Response.Status.BAD_REQUEST); + GCS_FIELD_MISSING(185, "Field missing", Response.Status.BAD_REQUEST), + NESTED_JOIN_NOT_ALLOWED(187, "Nested join is not supported.", Response.Status.BAD_REQUEST), + FEATURE_NOT_FOUND(188, "Could not find feature.", Response.Status.NOT_FOUND); private int code; private String message;