-
Notifications
You must be signed in to change notification settings - Fork 25.6k
Save memory when string terms are not on top #57758
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 4 commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
c7a6dee
Save memory when string terms are not on top
nik9000 175b942
Debug info
nik9000 f8842c0
Big oops
nik9000 c04bf45
Merge branch 'master' into terms_mem
nik9000 e1a6264
Hard check now
nik9000 38364b6
Merge branch 'master' into terms_mem
nik9000 31fac9f
Update after merge
nik9000 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
220 changes: 220 additions & 0 deletions
220
...rc/main/java/org/elasticsearch/search/aggregations/bucket/terms/BytesKeyedBucketOrds.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,220 @@ | ||
| /* | ||
| * Licensed to Elasticsearch under one or more contributor | ||
| * license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright | ||
| * ownership. Elasticsearch licenses this file to you under | ||
| * the Apache License, Version 2.0 (the "License"); you may | ||
| * not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, | ||
| * software distributed under the License is distributed on an | ||
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| * KIND, either express or implied. See the License for the | ||
| * specific language governing permissions and limitations | ||
| * under the License. | ||
| */ | ||
|
|
||
| package org.elasticsearch.search.aggregations.bucket.terms; | ||
|
|
||
| import org.apache.lucene.util.BytesRef; | ||
| import org.elasticsearch.common.lease.Releasable; | ||
| import org.elasticsearch.common.lease.Releasables; | ||
| import org.elasticsearch.common.util.BigArrays; | ||
| import org.elasticsearch.common.util.BytesRefHash; | ||
|
|
||
| /** | ||
| * Maps {@link BytesRef} bucket keys to bucket ordinals. | ||
| */ | ||
| public abstract class BytesKeyedBucketOrds implements Releasable { | ||
| /** | ||
| * Build a {@link LongKeyedBucketOrds}. | ||
| */ | ||
| public static BytesKeyedBucketOrds build(BigArrays bigArrays, boolean collectsFromSingleBucket) { | ||
| return collectsFromSingleBucket ? new FromSingle(bigArrays) : new FromMany(bigArrays); | ||
| } | ||
|
|
||
| private BytesKeyedBucketOrds() {} | ||
|
|
||
| /** | ||
| * Add the {@code owningBucketOrd, value} pair. Return the ord for | ||
| * their bucket if they have yet to be added, or {@code -1-ord} | ||
| * if they were already present. | ||
| */ | ||
| public abstract long add(long owningBucketOrd, BytesRef value); | ||
|
|
||
| /** | ||
| * Count the buckets in {@code owningBucketOrd}. | ||
| */ | ||
| public abstract long bucketsInOrd(long owningBucketOrd); | ||
|
|
||
| /** | ||
| * The number of collected buckets. | ||
| */ | ||
| public abstract long size(); | ||
|
|
||
| /** | ||
| * Build an iterator for buckets inside {@code owningBucketOrd} in order | ||
| * of increasing ord. | ||
| * <p> | ||
| * When this is first returns it is "unpositioned" and you must call | ||
| * {@link BucketOrdsEnum#next()} to move it to the first value. | ||
| */ | ||
| public abstract BucketOrdsEnum ordsEnum(long owningBucketOrd); | ||
|
|
||
| /** | ||
| * An iterator for buckets inside a particular {@code owningBucketOrd}. | ||
| */ | ||
| public interface BucketOrdsEnum { | ||
| /** | ||
| * Advance to the next value. | ||
| * @return {@code true} if there *is* a next value, | ||
| * {@code false} if there isn't | ||
| */ | ||
| boolean next(); | ||
|
|
||
| /** | ||
| * The ordinal of the current value. | ||
| */ | ||
| long ord(); | ||
|
|
||
| /** | ||
| * Read the current value. | ||
| */ | ||
| void readValue(BytesRef dest); | ||
|
|
||
| /** | ||
| * An {@linkplain BucketOrdsEnum} that is empty. | ||
| */ | ||
| BucketOrdsEnum EMPTY = new BucketOrdsEnum() { | ||
| @Override | ||
| public boolean next() { | ||
| return false; | ||
| } | ||
|
|
||
| @Override | ||
| public long ord() { | ||
| return 0; | ||
| } | ||
|
|
||
| @Override | ||
| public void readValue(BytesRef dest) {} | ||
| }; | ||
| } | ||
|
|
||
| /** | ||
| * Implementation that only works if it is collecting from a single bucket. | ||
| */ | ||
| private static class FromSingle extends BytesKeyedBucketOrds { | ||
| private final BytesRefHash ords; | ||
|
|
||
| private FromSingle(BigArrays bigArrays) { | ||
| ords = new BytesRefHash(1, bigArrays); | ||
| } | ||
|
|
||
| @Override | ||
| public long add(long owningBucketOrd, BytesRef value) { | ||
| assert owningBucketOrd == 0; | ||
| return ords.add(value); | ||
| } | ||
|
|
||
| @Override | ||
| public long bucketsInOrd(long owningBucketOrd) { | ||
| return ords.size(); | ||
| } | ||
|
|
||
| @Override | ||
| public long size() { | ||
| return ords.size(); | ||
| } | ||
|
|
||
| @Override | ||
| public BucketOrdsEnum ordsEnum(long owningBucketOrd) { | ||
| return new BucketOrdsEnum() { | ||
| private int ord = -1; | ||
|
|
||
| @Override | ||
| public boolean next() { | ||
| ord++; | ||
| return ord < ords.size(); | ||
| } | ||
|
|
||
| @Override | ||
| public long ord() { | ||
| return ord; | ||
| } | ||
|
|
||
| @Override | ||
| public void readValue(BytesRef dest) { | ||
| ords.get(ord, dest); | ||
| } | ||
| }; | ||
| } | ||
|
|
||
| @Override | ||
| public void close() { | ||
| ords.close(); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Implementation that works properly when collecting from many buckets. | ||
| */ | ||
| private static class FromMany extends BytesKeyedBucketOrds { | ||
| // TODO we can almost certainly do better here by building something fit for purpose rather than trying to lego together stuff | ||
| private final BytesRefHash bytesToLong; | ||
| private final LongKeyedBucketOrds longToBucketOrds; | ||
|
|
||
| private FromMany(BigArrays bigArrays) { | ||
| bytesToLong = new BytesRefHash(1, bigArrays); | ||
| longToBucketOrds = LongKeyedBucketOrds.build(bigArrays, false); | ||
| } | ||
|
|
||
| @Override | ||
| public long add(long owningBucketOrd, BytesRef value) { | ||
| long l = bytesToLong.add(value); | ||
| if (l < 0) { | ||
| l = -1 - l; | ||
| } | ||
| return longToBucketOrds.add(owningBucketOrd, l); | ||
| } | ||
|
|
||
| @Override | ||
| public long bucketsInOrd(long owningBucketOrd) { | ||
| return longToBucketOrds.bucketsInOrd(owningBucketOrd); | ||
| } | ||
|
|
||
| @Override | ||
| public long size() { | ||
| return longToBucketOrds.size(); | ||
| } | ||
|
|
||
| @Override | ||
| public BucketOrdsEnum ordsEnum(long owningBucketOrd) { | ||
| LongKeyedBucketOrds.BucketOrdsEnum delegate = longToBucketOrds.ordsEnum(owningBucketOrd); | ||
| return new BucketOrdsEnum() { | ||
| @Override | ||
| public boolean next() { | ||
| return delegate.next(); | ||
| } | ||
|
|
||
| @Override | ||
| public long ord() { | ||
| return delegate.ord(); | ||
| } | ||
|
|
||
| @Override | ||
| public void readValue(BytesRef dest) { | ||
| bytesToLong.get(delegate.value(), dest); | ||
| } | ||
| }; | ||
| } | ||
|
|
||
| @Override | ||
| public void close() { | ||
| Releasables.close(bytesToLong, longToBucketOrds); | ||
| } | ||
| } | ||
| } | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, that reminds me, I should put a private default constructor on
ValuesSourceConfig, thanks.