diff --git a/cloud/filestore/apps/client/lib/command.cpp b/cloud/filestore/apps/client/lib/command.cpp index a11193d690..8f9f58e71b 100644 --- a/cloud/filestore/apps/client/lib/command.cpp +++ b/cloud/filestore/apps/client/lib/command.cpp @@ -494,7 +494,8 @@ TVector TFileStoreCommand::ResolvePath( NProto::TListNodesResponse TFileStoreCommand::ListAll( ISession& session, const TString& fsId, - ui64 parentId) + ui64 parentId, + bool disableMultiTabletForwarding) { NProto::TListNodesResponse fullResult; TString cookie; @@ -502,7 +503,8 @@ NProto::TListNodesResponse TFileStoreCommand::ListAll( auto request = CreateRequest(); request->SetFileSystemId(fsId); request->SetNodeId(parentId); - request->MutableHeaders()->SetDisableMultiTabletForwarding(true); + request->MutableHeaders()->SetDisableMultiTabletForwarding( + disableMultiTabletForwarding); request->SetCookie(cookie); auto response = WaitFor(session.ListNodes( diff --git a/cloud/filestore/apps/client/lib/command.h b/cloud/filestore/apps/client/lib/command.h index 9af3b3c672..2a7e26a292 100644 --- a/cloud/filestore/apps/client/lib/command.h +++ b/cloud/filestore/apps/client/lib/command.h @@ -196,7 +196,8 @@ class TFileStoreCommand NProto::TListNodesResponse ListAll( ISession& session, const TString& fsId, - ui64 parentId); + ui64 parentId, + bool disableMultiTabletForwarding); class TSessionGuard final { diff --git a/cloud/filestore/apps/client/lib/diff.cpp b/cloud/filestore/apps/client/lib/diff.cpp index 69abf4f5b0..1f8c63934f 100644 --- a/cloud/filestore/apps/client/lib/diff.cpp +++ b/cloud/filestore/apps/client/lib/diff.cpp @@ -51,7 +51,7 @@ class TDiffCommand final ui64 parentId, TMap& result) { - auto response = ListAll(session, fsId, parentId); + auto response = ListAll(session, fsId, parentId, false); for (ui32 i = 0; i < response.NodesSize(); ++i) { const auto& node = response.GetNodes(i); diff --git a/cloud/filestore/apps/client/lib/find.cpp b/cloud/filestore/apps/client/lib/find.cpp index a9bf742139..d6a3badd8c 100644 --- a/cloud/filestore/apps/client/lib/find.cpp +++ b/cloud/filestore/apps/client/lib/find.cpp @@ -47,7 +47,7 @@ class TFindCommand final ui32 depth) { --depth; - auto response = ListAll(session, fsId, parentId); + auto response = ListAll(session, fsId, parentId, false); // TODO: async diff --git a/cloud/filestore/apps/client/lib/find_garbage.cpp b/cloud/filestore/apps/client/lib/find_garbage.cpp index 1850a932d6..32c39ffcba 100644 --- a/cloud/filestore/apps/client/lib/find_garbage.cpp +++ b/cloud/filestore/apps/client/lib/find_garbage.cpp @@ -48,7 +48,7 @@ class TFindGarbageCommand final TVector* nodes) { // TODO: async listing - auto response = ListAll(session, fsId, parentId); + auto response = ListAll(session, fsId, parentId, true); for (ui32 i = 0; i < response.NodesSize(); ++i) { const auto& node = response.GetNodes(i); diff --git a/cloud/filestore/libs/storage/service/service_actor_createfs.cpp b/cloud/filestore/libs/storage/service/service_actor_createfs.cpp index 4688b91a95..5c953c2f30 100644 --- a/cloud/filestore/libs/storage/service/service_actor_createfs.cpp +++ b/cloud/filestore/libs/storage/service/service_actor_createfs.cpp @@ -207,6 +207,11 @@ void TCreateFileStoreActor::ConfigureShards(const TActorContext& ctx) request->Record.SetFileSystemId( FileStoreConfig.ShardConfigs[i].GetFileSystemId()); request->Record.SetShardNo(i + 1); + if (StorageConfig->GetDirectoryCreationInShardsEnabled()) { + for (const auto& shard: FileStoreConfig.ShardConfigs) { + request->Record.AddShardFileSystemIds(shard.GetFileSystemId()); + } + } LOG_INFO( ctx, diff --git a/cloud/filestore/libs/storage/service/service_actor_createhandle.cpp b/cloud/filestore/libs/storage/service/service_actor_createhandle.cpp index c70b8ab077..b3b03fff3b 100644 --- a/cloud/filestore/libs/storage/service/service_actor_createhandle.cpp +++ b/cloud/filestore/libs/storage/service/service_actor_createhandle.cpp @@ -124,6 +124,7 @@ void TCreateHandleActor::CreateHandleInShard(const TActorContext& ctx) request->Record.SetNodeId(RootNodeId); request->Record.SetName(LeaderResponse.GetShardNodeName()); request->Record.ClearShardFileSystemId(); + request->Record.MutableHeaders()->SetBehaveAsDirectoryTablet(false); // E_EXCLUSIVE flag should be unset in order not to get EEXIST from the // shard const auto exclusiveFlag = @@ -168,6 +169,19 @@ void TCreateHandleActor::HandleCreateHandleResponse( LeaderResponded = true; LeaderResponse = std::move(msg->Record); + if (LeaderResponse.GetHandle()) { + LOG_DEBUG( + ctx, + TFileStoreComponents::SERVICE, + "CreateHandle - child node is managed by leader, node: %lu" + ", handle: %lu", + LeaderResponse.GetNodeAttr().GetId(), + LeaderResponse.GetHandle()); + + ReplyAndDie(ctx, std::move(LeaderResponse)); + return; + } + CreateHandleInShard(ctx); } diff --git a/cloud/filestore/libs/storage/service/service_actor_getnodeattr.cpp b/cloud/filestore/libs/storage/service/service_actor_getnodeattr.cpp index b0b4b91523..04b58ad9a9 100644 --- a/cloud/filestore/libs/storage/service/service_actor_getnodeattr.cpp +++ b/cloud/filestore/libs/storage/service/service_actor_getnodeattr.cpp @@ -251,6 +251,31 @@ void TStorageServiceActor::HandleGetNodeAttr( return NCloud::Reply(ctx, *ev, std::move(response)); } + const NProto::TFileStore& filestore = session->FileStore; + + auto& headers = *msg->Record.MutableHeaders(); + headers.SetBehaveAsDirectoryTablet( + StorageConfig->GetDirectoryCreationInShardsEnabled()); + if (auto shardNo = ExtractShardNo(msg->Record.GetNodeId())) { + // parent directory is managed by a shard + auto [shardId, error] = SelectShard( + ctx, + sessionId, + seqNo, + headers.GetDisableMultiTabletForwarding(), + TEvService::TGetNodeAttrMethod::Name, + msg->CallContext->RequestId, + filestore, + shardNo); + if (HasError(error)) { + auto response = + std::make_unique( + std::move(error)); + return NCloud::Reply(ctx, *ev, std::move(response)); + } + msg->Record.SetFileSystemId(shardId); + } + auto [cookie, inflight] = CreateInFlightRequest( TRequestInfo(ev->Sender, ev->Cookie, msg->CallContext), session->MediaKind, diff --git a/cloud/filestore/libs/storage/service/service_actor_listnodes.cpp b/cloud/filestore/libs/storage/service/service_actor_listnodes.cpp index d4e0bcb089..f432ca2706 100644 --- a/cloud/filestore/libs/storage/service/service_actor_listnodes.cpp +++ b/cloud/filestore/libs/storage/service/service_actor_listnodes.cpp @@ -641,6 +641,31 @@ void TStorageServiceActor::HandleListNodes( return NCloud::Reply(ctx, *ev, std::move(response)); } + const NProto::TFileStore& filestore = session->FileStore; + + auto& headers = *msg->Record.MutableHeaders(); + headers.SetBehaveAsDirectoryTablet( + StorageConfig->GetDirectoryCreationInShardsEnabled()); + if (auto shardNo = ExtractShardNo(msg->Record.GetNodeId())) { + // parent directory is managed by a shard + auto [shardId, error] = SelectShard( + ctx, + sessionId, + seqNo, + headers.GetDisableMultiTabletForwarding(), + TEvService::TListNodesMethod::Name, + msg->CallContext->RequestId, + filestore, + shardNo); + if (HasError(error)) { + auto response = + std::make_unique( + std::move(error)); + return NCloud::Reply(ctx, *ev, std::move(response)); + } + msg->Record.SetFileSystemId(shardId); + } + auto [cookie, inflight] = CreateInFlightRequest( TRequestInfo(ev->Sender, ev->Cookie, msg->CallContext), session->MediaKind, diff --git a/cloud/filestore/libs/storage/service/service_ut_sharding.cpp b/cloud/filestore/libs/storage/service/service_ut_sharding.cpp index c51cec2831..1c1a72143b 100644 --- a/cloud/filestore/libs/storage/service/service_ut_sharding.cpp +++ b/cloud/filestore/libs/storage/service/service_ut_sharding.cpp @@ -4663,6 +4663,135 @@ Y_UNIT_TEST_SUITE(TStorageServiceShardingTest) service.DestroyHandle(headers, fsId, nodeId2, handle2); service.DestroyHandle(headers, fsId, nodeId3, handle3); } + + SERVICE_TEST_SID_SELECT_IN_LEADER_ONLY( + ShouldListNodesAndGetNodeAttrInDirectoryInShard) + { + config.SetMultiTabletForwardingEnabled(true); + config.SetDirectoryCreationInShardsEnabled(true); + TTestEnv env({}, config); + env.CreateSubDomain("nfs"); + + ui32 nodeIdx = env.CreateNode("nfs"); + + const TString fsId = "test"; + const auto shard1Id = fsId + "-f1"; + const auto shard2Id = fsId + "-f2"; + + TServiceClient service(env.GetRuntime(), nodeIdx); + service.CreateFileStore(fsId, 1'000); + service.CreateFileStore(shard1Id, 1'000); + service.CreateFileStore(shard2Id, 1'000); + + ConfigureShards(service, fsId, shard1Id, shard2Id); + + auto headers = service.InitSession(fsId, "client"); + + auto createNodeResponse = service.CreateNode( + headers, + TCreateNodeArgs::Directory(RootNodeId, "dir1"))->Record; + const auto dir1Id = createNodeResponse.GetNode().GetId(); + UNIT_ASSERT_VALUES_EQUAL(1, ExtractShardNo(dir1Id)); + + service.CreateNode( + headers, + TCreateNodeArgs::File(dir1Id, "file1")); + service.CreateNode( + headers, + TCreateNodeArgs::File(dir1Id, "file2")); + service.CreateNode( + headers, + TCreateNodeArgs::File(dir1Id, "file3")); + service.CreateNode( + headers, + TCreateNodeArgs::File(dir1Id, "file4")); + + auto listNodesResponse = service.ListNodes( + headers, + fsId, + dir1Id)->Record; + + UNIT_ASSERT_VALUES_EQUAL(4, listNodesResponse.NamesSize()); + UNIT_ASSERT_VALUES_EQUAL("file1", listNodesResponse.GetNames(0)); + UNIT_ASSERT_VALUES_EQUAL("file2", listNodesResponse.GetNames(1)); + UNIT_ASSERT_VALUES_EQUAL("file3", listNodesResponse.GetNames(2)); + UNIT_ASSERT_VALUES_EQUAL("file4", listNodesResponse.GetNames(3)); + TVector> nodes(4); + for (ui32 i = 0; i < 4; ++i) { + nodes[i] = { + listNodesResponse.GetNodes(i).GetId(), + listNodesResponse.GetNames(i)}; + UNIT_ASSERT_VALUES_UNEQUAL(0, nodes[i].first); + } + + UNIT_ASSERT_VALUES_EQUAL(2, ExtractShardNo(nodes[0].first)); + UNIT_ASSERT_VALUES_EQUAL(1, ExtractShardNo(nodes[1].first)); + UNIT_ASSERT_VALUES_EQUAL(2, ExtractShardNo(nodes[2].first)); + UNIT_ASSERT_VALUES_EQUAL(1, ExtractShardNo(nodes[3].first)); + + auto getAttrResponse = service.GetNodeAttr( + headers, + fsId, + RootNodeId, + "dir1")->Record; + + UNIT_ASSERT_VALUES_EQUAL(dir1Id, getAttrResponse.GetNode().GetId()); + UNIT_ASSERT_VALUES_EQUAL( + static_cast(NProto::E_DIRECTORY_NODE), + getAttrResponse.GetNode().GetType()); + + getAttrResponse = service.GetNodeAttr( + headers, + fsId, + dir1Id, + "file1")->Record; + + UNIT_ASSERT_VALUES_EQUAL( + nodes[0].first, + getAttrResponse.GetNode().GetId()); + UNIT_ASSERT_VALUES_EQUAL( + static_cast(NProto::E_REGULAR_NODE), + getAttrResponse.GetNode().GetType()); + + getAttrResponse = service.GetNodeAttr( + headers, + fsId, + dir1Id, + "file2")->Record; + + UNIT_ASSERT_VALUES_EQUAL( + nodes[1].first, + getAttrResponse.GetNode().GetId()); + UNIT_ASSERT_VALUES_EQUAL( + static_cast(NProto::E_REGULAR_NODE), + getAttrResponse.GetNode().GetType()); + + getAttrResponse = service.GetNodeAttr( + headers, + fsId, + dir1Id, + "file3")->Record; + + UNIT_ASSERT_VALUES_EQUAL( + nodes[2].first, + getAttrResponse.GetNode().GetId()); + UNIT_ASSERT_VALUES_EQUAL( + static_cast(NProto::E_REGULAR_NODE), + getAttrResponse.GetNode().GetType()); + + getAttrResponse = service.GetNodeAttr( + headers, + fsId, + dir1Id, + "file4")->Record; + + UNIT_ASSERT_VALUES_EQUAL( + nodes[3].first, + getAttrResponse.GetNode().GetId()); + UNIT_ASSERT_VALUES_EQUAL( + static_cast(NProto::E_REGULAR_NODE), + getAttrResponse.GetNode().GetType()); + } } } // namespace NCloud::NFileStore::NStorage diff --git a/cloud/filestore/libs/storage/tablet/tablet_actor_createhandle.cpp b/cloud/filestore/libs/storage/tablet/tablet_actor_createhandle.cpp index 738f9c924a..64d7a24e92 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_actor_createhandle.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_actor_createhandle.cpp @@ -206,6 +206,7 @@ bool TIndexTabletActor::PrepareTx_CreateHandle( auto shardId = args.RequestShardId; if (!BehaveAsShard(args.Request.GetHeaders()) + && !GetFileSystem().GetShardFileSystemIds().empty() && Config->GetShardIdSelectionInLeaderEnabled()) { args.Error = SelectShard(0 /*fileSize*/, &shardId); diff --git a/cloud/filestore/libs/storage/tablet/tablet_actor_createnode.cpp b/cloud/filestore/libs/storage/tablet/tablet_actor_createnode.cpp index db5ec79fa8..92fe3c0b25 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_actor_createnode.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_actor_createnode.cpp @@ -463,6 +463,7 @@ bool TIndexTabletActor::PrepareTx_CreateNode( if (!BehaveAsShard(args.Request.GetHeaders()) && Config->GetShardIdSelectionInLeaderEnabled() + && !GetFileSystem().GetShardFileSystemIds().empty() && (args.Attrs.GetType() == NProto::E_REGULAR_NODE || Config->GetDirectoryCreationInShardsEnabled())) { diff --git a/cloud/filestore/libs/storage/tablet/tablet_state_data.cpp b/cloud/filestore/libs/storage/tablet/tablet_state_data.cpp index bb4bfae7f8..b5ddb89b54 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_state_data.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_state_data.cpp @@ -1434,7 +1434,16 @@ TReadAheadCacheStats TIndexTabletState::CalculateReadAheadCacheStats() const NProto::TError TIndexTabletState::SelectShard(ui64 fileSize, TString* shardId) { - return Impl->ShardBalancer.SelectShard(fileSize, shardId); + auto e = Impl->ShardBalancer.SelectShard(fileSize, shardId); + if (HasError(e)) { + return e; + } + + if (*shardId == GetFileSystemId()) { + shardId->clear(); + } + + return e; } void TIndexTabletState::UpdateShardStats(const TVector& stats) diff --git a/cloud/filestore/tests/client_sharded_dir/canondata/result.json b/cloud/filestore/tests/client_sharded_dir/canondata/result.json new file mode 100644 index 0000000000..90e8c84f11 --- /dev/null +++ b/cloud/filestore/tests/client_sharded_dir/canondata/result.json @@ -0,0 +1,5 @@ +{ + "test.test_nonsharded_vs_sharded_fs": { + "uri": "file://test.test_nonsharded_vs_sharded_fs/results.txt" + } +} diff --git a/cloud/filestore/tests/client_sharded_dir/canondata/test.test_nonsharded_vs_sharded_fs/results.txt b/cloud/filestore/tests/client_sharded_dir/canondata/test.test_nonsharded_vs_sharded_fs/results.txt new file mode 100644 index 0000000000..0f08e3461e --- /dev/null +++ b/cloud/filestore/tests/client_sharded_dir/canondata/test.test_nonsharded_vs_sharded_fs/results.txt @@ -0,0 +1,25 @@ +[ + { + "Type": 2, + "Links": 1, + "Name": "a0", + "Mode": 511, + "Id": 2 + }, + { + "Type": 2, + "Links": 1, + "Name": "a1", + "Mode": 511, + "Id": 17 + } +][ + { + "ShardFileSystemId": "fs1_s1", + "Name": "a0" + }, + { + "ShardFileSystemId": "fs1_s2", + "Name": "a1" + } +] \ No newline at end of file diff --git a/cloud/filestore/tests/client_sharded_dir/cloud-filestore-tests-client_sharded_dir b/cloud/filestore/tests/client_sharded_dir/cloud-filestore-tests-client_sharded_dir new file mode 120000 index 0000000000..96bcb0da0c --- /dev/null +++ b/cloud/filestore/tests/client_sharded_dir/cloud-filestore-tests-client_sharded_dir @@ -0,0 +1 @@ +/home/astr/.ya/build/symres/a7e618c7f78ab2ed77a865c40d747705/cloud-filestore-tests-client_sharded_dir \ No newline at end of file diff --git a/cloud/filestore/tests/client_sharded_dir/nfs-storage.txt b/cloud/filestore/tests/client_sharded_dir/nfs-storage.txt new file mode 100644 index 0000000000..020ee5ee73 --- /dev/null +++ b/cloud/filestore/tests/client_sharded_dir/nfs-storage.txt @@ -0,0 +1,8 @@ +MultiTabletForwardingEnabled: true +LargeDeletionMarkersEnabled: true +MaxFileBlocks: 536870912 +AutomaticShardCreationEnabled: true +ShardAllocationUnit: 1073741824 +AutomaticallyCreatedShardSize: 1073741824 +ShardIdSelectionInLeaderEnabled: true +DirectoryCreationInShardsEnabled: true diff --git a/cloud/filestore/tests/client_sharded_dir/test.py b/cloud/filestore/tests/client_sharded_dir/test.py new file mode 100644 index 0000000000..32a3bcfda7 --- /dev/null +++ b/cloud/filestore/tests/client_sharded_dir/test.py @@ -0,0 +1,135 @@ +import json +import os + +import yatest.common as common + +from cloud.filestore.tests.python.lib.client import FilestoreCliClient + +BLOCK_SIZE = 4 * 1024 +SHARD_SIZE = 1024 * 1024 * 1024 + + +def __init_test(): + port = os.getenv("NFS_SERVER_PORT") + binary_path = common.binary_path("cloud/filestore/apps/client/filestore-client") + client = FilestoreCliClient(binary_path, port, cwd=common.output_path()) + + results_path = common.output_path() + "/results.txt" + return client, results_path + + +def __process_stat(node): + def d(k): + if k in node: + del node[k] + + d("ATime") + d("MTime") + d("CTime") + d("ShardNodeName") + + return node + + +def __exec_ls(client, *args): + output = str(client.ls(*args, "--json"), 'utf-8') + nodes: list = json.loads(output)['content'] + + for node in nodes: + __process_stat(node) + + return json.dumps(nodes, indent=4).encode('utf-8') + + +def __write_some_data(client, fs_id, path, data): + data_file = os.path.join(common.output_path(), "data.txt") + with open(data_file, "w") as f: + f.write("data for %s" % path) + f.write(":: actual data: %s" % data) + + client.write(fs_id, path, "--data", data_file) + + +class FsItem: + + def __init__(self, path, is_dir, data): + self.path = path + self.is_dir = is_dir + self.data = data + + +def __fill_fs(client, fs_id, items): + for item in items: + if item.is_dir: + client.mkdir(fs_id, item.path) + else: + if item.data is not None: + __write_some_data(client, fs_id, item.path, item.data) + else: + client.touch(fs_id, item.path) + + +def test_nonsharded_vs_sharded_fs(): + client, results_path = __init_test() + client.create( + "fs0", + "test_cloud", + "test_folder", + BLOCK_SIZE, + int(SHARD_SIZE / BLOCK_SIZE) - 1) + client.create( + "fs1", + "test_cloud", + "test_folder", + BLOCK_SIZE, + 3 * int(SHARD_SIZE / BLOCK_SIZE)) + + def _d(path): + return FsItem(path, True, None) + + def _f(path, data=None): + return FsItem(path, False, data) + + items = [ + _d("/a0"), + _f("/a0/f0.txt", "xxx"), + _f("/a0/f1.txt", "xxx2"), + _f("/a0/f2.txt", "xxx3"), + _d("/a0/b0"), + _f("/a0/f3.txt", "xxx4"), + _f("/a0/f4.txt"), + _d("/a0/b0/c0"), + _f("/a0/f5.txt"), + _f("/a0/f6.txt", "yyyy"), + _f("/a0/f7.txt", "yyyy2"), + _f("/a0/f8.txt"), + _d("/a0/b0/c0/d0"), + _f("/a0/b0/c0/d0/f9.txt", "yyyy3"), + _f("/a0/b0/c0/d0/f10.txt", "yyyy4"), + _d("/a1"), + _d("/a1/b1"), + _d("/a1/b2"), + _f("/a1/b2/f11.txt", "zzzzz"), + _f("/a1/b2/f12.txt", "zzzzz2"), + _f("/a1/b2/f13.txt", "zzzzz3"), + _f("/a1/b2/f14.txt", "zzzzz4"), + _d("/a1/b2/c1"), + _f("/a1/b2/f15.txt", "ZZZZZZZZZZZ"), + _d("/a1/b3"), + ] + + __fill_fs(client, "fs0", items) + __fill_fs(client, "fs1", items) + + out = __exec_ls(client, "fs0", "/", "--disable-multitablet-forwarding") + out += __exec_ls(client, "fs1", "/", "--disable-multitablet-forwarding") + out += client.diff("fs0", "fs1") + + client.destroy("fs0") + client.destroy("fs1") + + with open(results_path, "wb") as results_file: + results_file.write(out) + + ret = common.canonical_file(results_path, local=True) + return ret diff --git a/cloud/filestore/tests/client_sharded_dir/ya.make b/cloud/filestore/tests/client_sharded_dir/ya.make new file mode 100644 index 0000000000..d63a6b405e --- /dev/null +++ b/cloud/filestore/tests/client_sharded_dir/ya.make @@ -0,0 +1,26 @@ +PY3TEST() + +INCLUDE(${ARCADIA_ROOT}/cloud/filestore/tests/recipes/medium.inc) + +TEST_SRCS( + test.py +) + +DEPENDS( + cloud/filestore/apps/client +) + +PEERDIR( + cloud/filestore/tests/python/lib +) + +SET( + NFS_STORAGE_CONFIG_PATCH + cloud/filestore/tests/client_sharded_dir/nfs-storage.txt +) + +SET(NFS_FORCE_VERBOSE 1) + +INCLUDE(${ARCADIA_ROOT}/cloud/filestore/tests/recipes/service-kikimr.inc) + +END() diff --git a/cloud/filestore/tests/ya.make b/cloud/filestore/tests/ya.make index 9b83000c4a..ce2e5e813d 100644 --- a/cloud/filestore/tests/ya.make +++ b/cloud/filestore/tests/ya.make @@ -8,6 +8,7 @@ RECURSE_FOR_TESTS( build_arcadia_test client client_sharded + client_sharded_dir config_dispatcher endpoints fio