diff --git a/presto-docs/src/main/sphinx/functions/aggregate.rst b/presto-docs/src/main/sphinx/functions/aggregate.rst index 9517c6088856c..2c393f2bb7bbc 100644 --- a/presto-docs/src/main/sphinx/functions/aggregate.rst +++ b/presto-docs/src/main/sphinx/functions/aggregate.rst @@ -564,6 +564,7 @@ To find the `ROC curve double @@ -688,23 +689,204 @@ where :math:`f(x)` is the partial density function of :math:`x`. Otherwise, if the number of distinct weights is low, especially if the number of samples is low, consider using the version taking - ``(bucket_count, x, weight, "fixed_histogram_jacknife", min, max)``, as jacknife bias correction, + ``(bucket_count, x, weight, 'fixed_histogram_jacknife', min, max)``, as jacknife bias correction, is better than maximum likelihood estimation. However, if the number of distinct weights is high, - consider using the version taking ``(bucket_count, x, weight, "fixed_histogram_mle", min, max)``, + consider using the version taking ``(bucket_count, x, weight, 'fixed_histogram_mle', min, max)``, as this will reduce memory and running time. +Discrete Entropy Functions +------------------------------- + +The following functions approximate the `discrete entropy `_. +That is, for a random variable :math:`x`, they approximate + +.. math :: + + H(x) = - \sum P(x) \log_2\left(P(x)\right) dx, + +where :math:`P(x)` is probability of :math:`x`. + +.. function:: discrete_entropy(x) + + Returns the approximate log-2 discrete entropy from a random variable's sample outcomes. The function internally + creates a map of the (hashed) outcomes of :math:`x` to the number of their occurrences, then calculates + the entropy based on the maximum-likelihood estimate of the counts. + + ``x`` (``boolean``, ```double``, ``int``, ``long``, or ``varchar``) is the samples. + + For example, to find the differential entropy of ``x``, use + + .. code-block:: none + + SELECT + discrete_entropy(x) + FROM + data + + .. note:: + + This is equivalent to ``discrete_entropy(x, 'mle')``. If the number of instances is small, + consider using jacknife correction via ``discrete_entropy(x, 'jacknife')``. + +.. function:: discrete_entropy(x, weight) + + Returns the approximate log-2 discrete entropy from a random variable's sample weighted outcomes. The function internally + creates a map of the (hashed) outcomes of :math:`x` to the total weight of their occurrences, then calculates + the entropy based on the maximum-likelihood estimate of the weights. + + ``x`` (``boolean``, ```double``, ``int``, ``long``, or ``varchar``) is the samples. + + ``weight`` (``double``) is the non-negative weights. + + For example, to find the differential entropy of ``x`` with weights ``weight``, use + + .. code-block:: none + + SELECT + discrete_entropy(x, weight) + FROM + data + + .. note:: + + This is equivalent to ``discrete_entropy(x, weight, 'mle')``. If the number of instances is small, + consider using jacknife correction via ``discrete_entropy(x, weight, 'jacknife')``. + +.. function:: discrete_entropy(x, method) + + Returns the approximate log-2 discrete entropy from a random variable's sample outcomes. + If ``method`` is ``'mle'``, this is equivalent to ``discrete_entropy(x)``. If ``method`` is ``'jacknife'``, + the function internally + creates a map of the (hashed) outcomes of :math:`x` and their weights to the number of their occurrences, + then calculates the entropy based on the jacknife-corrected maximum-likelihood estimate of the counts. + + ``x`` (``boolean``, ```double``, ``int``, ``long``, or ``varchar``) is the samples. + + ``method`` is either ``'mle'`` or ``'jacknife'``. + + For example, to find the differential entropy of ``x``, use + + .. code-block:: none + + SELECT + discrete_entropy(x, 'jacknife') + FROM + data + + .. note:: + + If the number of instances is large, prefer using ``'mle'`` to ``jacknife``, as it is faster. + +.. function:: discrete_entropy(x, weight, method) + + Returns the approximate log-2 discrete entropy from a random variable's sample outcomes. + If ``method`` is ``'mle'``, this is equivalent to ``discrete_entropy(x, 'weight')``. If ``method`` is ``'jacknife'``, + the function internally + creates a map of the (hashed) outcomes of :math:`x` and their weights to the number of their occurrences, + then calculates the entropy based on the jacknife-corrected maximum-likelihood estimate of the counts. + + ``x`` (``boolean``, ```double``, ``int``, ``long``, or ``varchar``) is the samples. + + ``weight`` (``double``) is the non-negative weights. + + ``method`` is either ``'mle'`` or ``'jacknife'``. + + For example, to find the differential entropy of ``x`` using weights ``weight`` and jacknife estimation, use + + .. code-block:: none + + SELECT + discrete_entropy(x, weight, 'jacknife') + FROM + data + + .. note:: + + If the number of instances is large, prefer using ``'mle'`` to ``jacknife``, as it is faster. If the number + of distinct weights is large, ``'jacknife'`` might have high memory usage. + + +Mutual Information for Classification Functions +-------------------------------------------------------------- + + The following functions approximate the binary + normalized `mutual information `_, which is a measure + of usefulness of a numerical feature for classification. They output a number between 0 (not predictive) + and 1 (completely predictive). See [Krier2006]_ for further details. + + For a discrete random variable :math:`y` and a numerical random variable :math:`x`, they approximate + + .. math :: + + I(x, y) = {h(x) - h(x \;|\; y) \over H(y)}, + + where :math:`H` is `discrete entropy `_ and + :math:`h` is `differential entropy `_. + Thus, they measure by how much the entropy of :math:`y` is reduced by knowing :math:`x`, + normalized by the entropy of :math:`y`. + + .. function:: normalized_differential_mutual_information_classification(sample_size, y, x) + + Returns the approximate normalized mutual information between a discrete ``y`` and a continuous ``x`` using + reservoir sampling (see :func:`differential_entropy`). + + The parameter ```sample_size`` determines the maximal number of reservoir samples. + + If :math:`x` has a known lower and upper bound, + prefer the 'fixed_histogram_mle' or 'fixed_histogram_jacknife' methods, as they have better convergence. + + .. function:: normalized_differential_mutual_information_classification(sample_size, y, x, weight, 'reservoir_vasicek') + + Returns the approximate normalized mutual information between a discrete ``y`` and a continuous ``x`` using + reservoir sampling (see :func:`differential_entropy`). + + The parameter `sample_size` determines the maxima number of reservoir samples. The parameter `weight` is the weight + of the sample, and must be non-negative. + + If :math:`x` has a known lower and upper bound, + prefer the 'fixed_histogram_mle' or 'fixed_histogram_jacknife' methods, as they have better convergence. + + .. function:: normalized_differential_mutual_information_classification(bucket_count, y, x, weight, 'fixed_histogram_mle', min, max) -> double + + Returns the approximate normalized mutual information between a discrete ``y`` and a continuous ``x`` using + the maximum-likelihood approximation of a histogram (see :func:`differential_entropy`). + + The parameter ``bucket_count`` determines the number of histogram buckets. The parameters ``min`` and ``max`` are the + minimal and maximal values, respectively; the function will throw if there is an input outside this range. + The parameter ``weight`` is the weight of the sample, and must be non-negative. + + If :math:`x` doesn't have known lower and upper bounds, prefer one of the two methods based on reservoir sampling. + Otherwise, if the number of samples is low, consider using the 'fixed_histogram_jacknife' version. + + .. function:: normalized_differential_mutual_information_classification(bucket_count, y, x, weight, 'fixed_histogram_jacknife', min, max) -> double + + Returns the approximate normalized mutual information between a discrete ``y`` and a continuous ``x`` using + a jacknife approximation of a histogram (see :func:`differential_entropy`). + + The parameter ``bucket_count`` determines the number of histogram buckets. The parameters `min` and `max` are the + minimal and maximal values, respectively; the function will throw if there is an input outside this range. + The parameter `weight` is the weight of the sample, and must be non-negative. + + If :math:`x` doesn't have known lower and upper bounds, prefer one of the two methods based on reservoir sampling. + Otherwise, if :math:`weight` can take on a wide range of distinct values, avoid using this method, as space and time costs + might be very high; instead, use 'fixed_histogram_mle'. + --------------------------- -.. [Alizadeh2010] Alizadeh Noughabi, Hadi & Arghami, N. (2010). "A New Estimator of Entropy". +.. [Alizadeh2010] Alizadeh Noughabi, Hadi & Arghami, N. (2010). 'A New Estimator of Entropy'. .. [Beirlant2001] Beirlant, Dudewicz, Gyorfi, and van der Meulen, - "Nonparametric entropy estimation: an overview", (2001) + 'Nonparametric entropy estimation: an overview', (2001) -.. [BenHaimTomTov2010] Yael Ben-Haim and Elad Tom-Tov, "A streaming parallel decision tree algorithm", +.. [BenHaimTomTov2010] Yael Ben-Haim and Elad Tom-Tov, 'A streaming parallel decision tree algorithm', J. Machine Learning Research 11 (2010), pp. 849--872. -.. [Black2015] Black, Paul E. (26 January 2015). "Reservoir sampling". Dictionary of Algorithms and Data Structures. +.. [Black2015] Black, Paul E. (26 January 2015). 'Reservoir sampling'. Dictionary of Algorithms and Data Structures. -.. [Efraimidis2006] Efraimidis, Pavlos S.; Spirakis, Paul G. (2006-03-16). "Weighted random sampling with a reservoir". +.. [Efraimidis2006] Efraimidis, Pavlos S.; Spirakis, Paul G. (2006-03-16). 'Weighted random sampling with a reservoir'. Information Processing Letters. 97 (5): 181–185. + +.. [Krier2006] Krier, C & François, Damien & Wertz, Vincent & Verleysen, Michel. (2006). + Feature scoring by mutual information for classification of mass spectra. 10.1142/9789812774118_0079. + diff --git a/presto-main/hs_err_pid49551.log b/presto-main/hs_err_pid49551.log new file mode 100644 index 0000000000000..4ab77dbbae465 --- /dev/null +++ b/presto-main/hs_err_pid49551.log @@ -0,0 +1,983 @@ +# +# A fatal error has been detected by the Java Runtime Environment: +# +# SIGBUS (0xa) at pc=0x00007fff61473e00, pid=49551, tid=0x000000000002a503 +# +# JRE version: OpenJDK Runtime Environment (8.0_222-b10) (build 1.8.0_222-b10) +# Java VM: OpenJDK 64-Bit Server VM (25.222-b10 mixed mode bsd-amd64 compressed oops) +# Problematic frame: +# C [libsystem_platform.dylib+0x1e00] _platform_memmove$VARIANT$Haswell+0x140 +# +# Failed to write core dump. Core dumps have been disabled. To enable core dumping, try "ulimit -c unlimited" before starting Java again +# +# If you would like to submit a bug report, please visit: +# http://bugreport.java.com/bugreport/crash.jsp +# The crash happened outside the Java Virtual Machine in native code. +# See problematic frame for where to report the bug. +# + +--------------- T H R E A D --------------- + +Current thread (0x00007fd978bc8800): JavaThread "pool-112-thread-1" [_thread_in_native, id=173315, stack(0x0000700017074000,0x0000700017174000)] + +siginfo: si_signo: 10 (SIGBUS), si_code: 2 (BUS_ADRERR), si_addr: 0x0000000123e59000 + +Registers: +RAX=0x00007fd97a17da02, RBX=0x0000000123e51714, RCX=0x000000000000001e, RDX=0x000000000000260f +RSP=0x000070001716f830, RBP=0x000070001716f830, RSI=0x0000000123e58fc3, RDI=0x00007fd97a17e320 +R8 =0x0000000000000000, R9 =0x0000000000000b01, R10=0x00007fd97a000000, R11=0x00007fd85632535d +R12=0x0000000081a40000, R13=0x0000000074736572, R14=0x00000000000339e4, R15=0x0000000000002f6d +RIP=0x00007fff61473e00, EFLAGS=0x0000000000010202, ERR=0x0000000000000004 + TRAPNO=0x000000000000000e + +Top of Stack: (sp=0x000070001716f830) +0x000070001716f830: 000070001716f8a0 000000010d59505f +0x000070001716f840: 0000006d7d02f010 000000000000002f +0x000070001716f850: 0000000000006166 00007fd97a725c00 +0x000070001716f860: 0000000000006f63 00007fd973e20130 +0x000070001716f870: 00007fd97c284680 00007fd973e20130 +0x000070001716f880: 00007fd976176200 00000000000003f0 +0x000070001716f890: 00000000abae72b6 00007fd97bebbb20 +0x000070001716f8a0: 000070001716f900 000000010d59552d +0x000070001716f8b0: 000070001716f954 000070001716f953 +0x000070001716f8c0: 0000003400000000 0000003501bc8800 +0x000070001716f8d0: 000070001716f91e 000070001716fd68 +0x000070001716f8e0: 0000000000000035 000070001716f91e +0x000070001716f8f0: 00007fd973e20130 00007fd978bc89e0 +0x000070001716f900: 000070001716fd50 000000010d594238 +0x000070001716f910: 00007fd978bc8800 6f63000100000001 +0x000070001716f920: 6f62656361662f6d 74736572702f6b6f +0x000070001716f930: 72702f6970732f6f 2f65746163696465 +0x000070001716f940: 656c62616c6c754e 6c632e65756c6156 +0x000070001716f950: 00007f0000737361 00000001124363d8 +0x000070001716f960: 0000000112c320b0 00000001124363d8 +0x000070001716f970: 0000000000000008 00000001124363d8 +0x000070001716f980: 0000000114cb18e0 000070001716fa90 +0x000070001716f990: 00000001124363d8 000070001716fc40 +0x000070001716f9a0: 00007fd978bc8800 000070001716fa90 +0x000070001716f9b0: 00000001124363d8 000070001716fc40 +0x000070001716f9c0: 000070001716fb30 000000010ccfe460 +0x000070001716f9d0: 0000000000000001 00007fd97d02f010 +0x000070001716f9e0: 00007fd978bc89e0 000070001716fc40 +0x000070001716f9f0: 000070001716fa20 000000079b2364e8 +0x000070001716fa00: 0000000740005fd8 000000012e9e2848 +0x000070001716fa10: 000000012e9e2848 00000007c0000ea8 +0x000070001716fa20: 00007000171701b0 000000010cbdff9b + +Instructions: (pc=0x00007fff61473e00) +0x00007fff61473de0: 80 00 00 00 76 5a 48 f7 c6 1f 00 00 00 74 31 c5 +0x00007fff61473df0: fc 29 0f c5 fc 29 57 20 48 83 c7 40 c5 fc 10 0e +0x00007fff61473e00: c5 fc 10 56 20 48 83 c6 40 48 83 ea 40 77 e0 eb +0x00007fff61473e10: 2f 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 + +Register to memory mapping: + +RAX=0x00007fd97a17da02 is an unknown value +RBX=0x0000000123e51714 is an unknown value +RCX=0x000000000000001e is an unknown value +RDX=0x000000000000260f is an unknown value +RSP=0x000070001716f830 is pointing into the stack for thread: 0x00007fd978bc8800 +RBP=0x000070001716f830 is pointing into the stack for thread: 0x00007fd978bc8800 +RSI=0x0000000123e58fc3 is an unknown value +RDI=0x00007fd97a17e320 is an unknown value +R8 =0x0000000000000000 is an unknown value +R9 =0x0000000000000b01 is an unknown value +R10=0x00007fd97a000000 is an unknown value +R11=0x00007fd85632535d is an unknown value +R12=0x0000000081a40000 is an unknown value +R13=0x0000000074736572 is an unknown value +R14=0x00000000000339e4 is an unknown value +R15=0x0000000000002f6d is an unknown value + + +Stack: [0x0000700017074000,0x0000700017174000], sp=0x000070001716f830, free space=1006k +Native frames: (J=compiled Java code, j=interpreted, Vv=VM code, C=native code) +C [libsystem_platform.dylib+0x1e00] _platform_memmove$VARIANT$Haswell+0x140 +C [libzip.dylib+0x305f] newEntry+0x321 +C [libzip.dylib+0x352d] ZIP_GetEntry2+0xd4 +C [libzip.dylib+0x2238] Java_java_util_zip_ZipFile_getEntry+0xcf +J 210 java.util.zip.ZipFile.getEntry(J[BZ)J (0 bytes) @ 0x0000000114de51ce [0x0000000114de5100+0xce] +J 1543 C2 java.util.zip.ZipFile.getEntry(Ljava/lang/String;)Ljava/util/zip/ZipEntry; (101 bytes) @ 0x00000001151f8c78 [0x00000001151f8900+0x378] +J 3002 C2 sun.misc.URLClassPath$JarLoader.getResource(Ljava/lang/String;Z)Lsun/misc/Resource; (85 bytes) @ 0x000000011569a270 [0x000000011569a0e0+0x190] +J 565 C2 sun.misc.URLClassPath.getResource(Ljava/lang/String;Z)Lsun/misc/Resource; (83 bytes) @ 0x0000000114ee693c [0x0000000114ee6860+0xdc] +J 1771 C2 java.net.URLClassLoader$1.run()Ljava/lang/Object; (5 bytes) @ 0x0000000115571e88 [0x0000000115571980+0x508] +v ~StubRoutines::call_stub +V [libjvm.dylib+0x2e3233] JavaCalls::call_helper(JavaValue*, methodHandle*, JavaCallArguments*, Thread*)+0x6a7 +V [libjvm.dylib+0x328145] JVM_DoPrivileged+0x4dd +J 256 java.security.AccessController.doPrivileged(Ljava/security/PrivilegedExceptionAction;Ljava/security/AccessControlContext;)Ljava/lang/Object; (0 bytes) @ 0x0000000114def715 [0x0000000114def640+0xd5] +J 1045 C2 java.lang.ClassLoader.loadClass(Ljava/lang/String;Z)Ljava/lang/Class; (122 bytes) @ 0x0000000115030dc4 [0x0000000115030360+0xa64] +J 4851 C2 java.lang.ClassLoader.loadClass(Ljava/lang/String;)Ljava/lang/Class; (7 bytes) @ 0x000000011544395c [0x00000001154437c0+0x19c] +v ~StubRoutines::call_stub +V [libjvm.dylib+0x2e3233] JavaCalls::call_helper(JavaValue*, methodHandle*, JavaCallArguments*, Thread*)+0x6a7 +V [libjvm.dylib+0x2e39da] JavaCalls::call_virtual(JavaValue*, KlassHandle, Symbol*, Symbol*, JavaCallArguments*, Thread*)+0x164 +V [libjvm.dylib+0x2e3b32] JavaCalls::call_virtual(JavaValue*, Handle, KlassHandle, Symbol*, Symbol*, Handle, Thread*)+0x56 +V [libjvm.dylib+0x512250] SystemDictionary::load_instance_class(Symbol*, Handle, Thread*)+0x2d8 +V [libjvm.dylib+0x511a0b] SystemDictionary::resolve_instance_class_or_null(Symbol*, Handle, Handle, Thread*)+0x455 +V [libjvm.dylib+0x51246c] SystemDictionary::resolve_or_null(Symbol*, Handle, Handle, Thread*)+0x106 +V [libjvm.dylib+0x512add] SystemDictionary::resolve_or_fail(Symbol*, Handle, Handle, bool, Thread*)+0x31 +V [libjvm.dylib+0x4d6a0d] SignatureStream::as_java_mirror(Handle, Handle, SignatureStream::FailureMode, Thread*)+0x2f +V [libjvm.dylib+0x50e3b7] SystemDictionary::find_method_handle_type(Symbol*, KlassHandle, Thread*)+0x259 +V [libjvm.dylib+0x50ed15] SystemDictionary::link_method_handle_constant(KlassHandle, int, KlassHandle, Symbol*, Symbol*, Thread*)+0x9f +V [libjvm.dylib+0x1e2655] ConstantPool::resolve_constant_at_impl(constantPoolHandle, int, int, Thread*)+0x561 +V [libjvm.dylib+0x1e3055] ConstantPool::resolve_possibly_cached_constant_at(int, Thread*)+0x97 +V [libjvm.dylib+0x1e2b24] ConstantPool::resolve_bootstrap_specifier_at_impl(constantPoolHandle, int, Thread*)+0x1a4 +V [libjvm.dylib+0x3a9162] ConstantPool::resolve_bootstrap_specifier_at(int, Thread*)+0x92 +V [libjvm.dylib+0x3a630a] LinkResolver::resolve_invokedynamic(CallInfo&, constantPoolHandle, int, Thread*)+0x9c +V [libjvm.dylib+0x3a8ef4] LinkResolver::resolve_invoke(CallInfo&, Handle, constantPoolHandle, int, Bytecodes::Code, Thread*)+0x340 +V [libjvm.dylib+0x2dd1df] InterpreterRuntime::resolve_invokedynamic(JavaThread*)+0x137 +j com.facebook.presto.tpch.TpchMetadata.()V+79 +v ~StubRoutines::call_stub +V [libjvm.dylib+0x2e3233] JavaCalls::call_helper(JavaValue*, methodHandle*, JavaCallArguments*, Thread*)+0x6a7 +V [libjvm.dylib+0x2b6515] InstanceKlass::call_class_initializer_impl(instanceKlassHandle, Thread*)+0xbf +V [libjvm.dylib+0x2b79e5] InstanceKlass::initialize_impl(instanceKlassHandle, Thread*)+0x4ab +V [libjvm.dylib+0x2b7c7b] InstanceKlass::initialize(Thread*)+0x3d +V [libjvm.dylib+0x2defb6] InterpreterRuntime::_new(JavaThread*, ConstantPool*, int)+0x5e +j com.facebook.presto.tpch.TpchConnectorFactory$1.getMetadata(Lcom/facebook/presto/spi/connector/ConnectorTransactionHandle;)Lcom/facebook/presto/spi/connector/ConnectorMetadata;+0 +j com.facebook.presto.transaction.InMemoryTransactionManager$TransactionMetadata$ConnectorTransactionMetadata.(Lcom/facebook/presto/spi/ConnectorId;Lcom/facebook/presto/spi/connector/Connector;Lcom/facebook/presto/spi/connector/ConnectorTransactionHandle;)V+57 +j com.facebook.presto.transaction.InMemoryTransactionManager$TransactionMetadata.createConnectorTransactionMetadata(Lcom/facebook/presto/spi/ConnectorId;Lcom/facebook/presto/metadata/Catalog;)Lcom/facebook/presto/transaction/InMemoryTransactionManager$TransactionMetadata$ConnectorTransactionMetadata;+17 +j com.facebook.presto.transaction.InMemoryTransactionManager$TransactionMetadata.getTransactionCatalogMetadata(Lcom/facebook/presto/spi/ConnectorId;)Lcom/facebook/presto/metadata/CatalogMetadata;+64 +j com.facebook.presto.transaction.InMemoryTransactionManager$TransactionMetadata.access$200(Lcom/facebook/presto/transaction/InMemoryTransactionManager$TransactionMetadata;Lcom/facebook/presto/spi/ConnectorId;)Lcom/facebook/presto/metadata/CatalogMetadata;+2 +j com.facebook.presto.transaction.InMemoryTransactionManager.lambda$getOptionalCatalogMetadata$1(Lcom/facebook/presto/transaction/InMemoryTransactionManager$TransactionMetadata;Lcom/facebook/presto/spi/ConnectorId;)Lcom/facebook/presto/metadata/CatalogMetadata;+2 +j com.facebook.presto.transaction.InMemoryTransactionManager$$Lambda$1743.apply(Ljava/lang/Object;)Ljava/lang/Object;+8 +J 10552 C2 java.util.Optional.map(Ljava/util/function/Function;)Ljava/util/Optional; (30 bytes) @ 0x0000000114f983d0 [0x0000000114f98360+0x70] +j com.facebook.presto.transaction.InMemoryTransactionManager.getOptionalCatalogMetadata(Lcom/facebook/presto/transaction/TransactionId;Ljava/lang/String;)Ljava/util/Optional;+22 +j com.facebook.presto.security.AccessControlManager.getConnectorAccessControl(Lcom/facebook/presto/transaction/TransactionId;Ljava/lang/String;)Lcom/facebook/presto/security/AccessControlManager$CatalogAccessControlEntry;+6 +j com.facebook.presto.security.AccessControlManager.checkCanSelectFromColumns(Lcom/facebook/presto/transaction/TransactionId;Lcom/facebook/presto/spi/security/Identity;Lcom/facebook/presto/metadata/QualifiedObjectName;Ljava/util/Set;)V+54 +j com.facebook.presto.security.TestAccessControlManager.lambda$testDenyCatalogAccessControl$3(Lcom/facebook/presto/security/AccessControlManager;Lcom/facebook/presto/transaction/TransactionId;)V+35 +j com.facebook.presto.security.TestAccessControlManager$$Lambda$1737.accept(Ljava/lang/Object;)V+8 +j com.facebook.presto.transaction.TransactionBuilder.lambda$execute$0(Ljava/util/function/Consumer;Lcom/facebook/presto/transaction/TransactionId;)Ljava/lang/Object;+2 +j com.facebook.presto.transaction.TransactionBuilder$$Lambda$1738.apply(Ljava/lang/Object;)Ljava/lang/Object;+8 +j com.facebook.presto.transaction.TransactionBuilder.execute(Ljava/util/function/Function;)Ljava/lang/Object;+33 +j com.facebook.presto.transaction.TransactionBuilder.execute(Ljava/util/function/Consumer;)V+14 +j com.facebook.presto.security.TestAccessControlManager.testDenyCatalogAccessControl()V+83 +v ~StubRoutines::call_stub +V [libjvm.dylib+0x2e3233] JavaCalls::call_helper(JavaValue*, methodHandle*, JavaCallArguments*, Thread*)+0x6a7 +V [libjvm.dylib+0x4ab524] Reflection::invoke(instanceKlassHandle, methodHandle, Handle, bool, objArrayHandle, BasicType, objArrayHandle, bool, Thread*)+0xe12 +V [libjvm.dylib+0x4aba58] Reflection::invoke_method(oopDesc*, Handle, objArrayHandle, Thread*)+0x16c +V [libjvm.dylib+0x31f742] JVM_InvokeMethod+0x1a0 +J 5911 sun.reflect.NativeMethodAccessorImpl.invoke0(Ljava/lang/reflect/Method;Ljava/lang/Object;[Ljava/lang/Object;)Ljava/lang/Object; (0 bytes) @ 0x0000000114f0c6ad [0x0000000114f0c5c0+0xed] +J 5193 C1 sun.reflect.NativeMethodAccessorImpl.invoke(Ljava/lang/Object;[Ljava/lang/Object;)Ljava/lang/Object; (104 bytes) @ 0x0000000115b109d4 [0x0000000115b0f800+0x11d4] +J 8571 C2 java.lang.reflect.Method.invoke(Ljava/lang/Object;[Ljava/lang/Object;)Ljava/lang/Object; (62 bytes) @ 0x000000011609b770 [0x000000011609b6c0+0xb0] +J 12476 C1 org.testng.internal.MethodInvocationHelper.invokeMethod(Ljava/lang/reflect/Method;Ljava/lang/Object;[Ljava/lang/Object;)Ljava/lang/Object; (246 bytes) @ 0x0000000116adaf84 [0x0000000116ad8740+0x2844] +J 13917 C1 org.testng.internal.Invoker.invokeMethod(Ljava/lang/Object;Lorg/testng/ITestNGMethod;[Ljava/lang/Object;ILorg/testng/xml/XmlSuite;Ljava/util/Map;Lorg/testng/ITestClass;[Lorg/testng/ITestNGMethod;[Lorg/testng/ITestNGMethod;Lorg/testng/internal/ConfigurationGroupMethods;Lorg/testng/internal/Invoker$FailureContext;)Lorg/testng/ITestResult; (1164 bytes) @ 0x0000000116ca16f4 [0x0000000116c9eb80+0x2b74] +J 13898 C1 org.testng.internal.Invoker.invokeTestMethod(Ljava/lang/Object;Lorg/testng/ITestNGMethod;[Ljava/lang/Object;ILorg/testng/xml/XmlSuite;Ljava/util/Map;Lorg/testng/ITestClass;[Lorg/testng/ITestNGMethod;[Lorg/testng/ITestNGMethod;Lorg/testng/internal/ConfigurationGroupMethods;Lorg/testng/internal/Invoker$FailureContext;)Lorg/testng/ITestResult; (37 bytes) @ 0x0000000116afb344 [0x0000000116afb0e0+0x264] +J 12057 C1 org.testng.internal.Invoker.invokeTestMethods(Lorg/testng/ITestNGMethod;Lorg/testng/xml/XmlSuite;Ljava/util/Map;Lorg/testng/internal/ConfigurationGroupMethods;Ljava/lang/Object;Lorg/testng/ITestContext;)Ljava/util/List; (1271 bytes) @ 0x000000011695afcc [0x0000000116957820+0x37ac] +J 12056 C1 org.testng.internal.TestMethodWorker.invokeTestMethods(Lorg/testng/ITestNGMethod;Ljava/lang/Object;Lorg/testng/ITestContext;)V (44 bytes) @ 0x0000000116953a44 [0x0000000116953960+0xe4] +J 19701 C1 org.testng.internal.TestMethodWorker.run()V (94 bytes) @ 0x0000000117efd80c [0x0000000117efd4a0+0x36c] +j java.util.concurrent.ThreadPoolExecutor.runWorker(Ljava/util/concurrent/ThreadPoolExecutor$Worker;)V+95 +j java.util.concurrent.ThreadPoolExecutor$Worker.run()V+5 +j java.lang.Thread.run()V+11 +v ~StubRoutines::call_stub +V [libjvm.dylib+0x2e3233] JavaCalls::call_helper(JavaValue*, methodHandle*, JavaCallArguments*, Thread*)+0x6a7 +V [libjvm.dylib+0x2e39da] JavaCalls::call_virtual(JavaValue*, KlassHandle, Symbol*, Symbol*, JavaCallArguments*, Thread*)+0x164 +V [libjvm.dylib+0x2e3b86] JavaCalls::call_virtual(JavaValue*, Handle, KlassHandle, Symbol*, Symbol*, Thread*)+0x4a +V [libjvm.dylib+0x322538] thread_entry(JavaThread*, Thread*)+0x7c +V [libjvm.dylib+0x53fd15] JavaThread::thread_main_inner()+0x9b +V [libjvm.dylib+0x5413fe] JavaThread::run()+0x1c0 +V [libjvm.dylib+0x463efa] java_start(Thread*)+0xf6 +C [libsystem_pthread.dylib+0x32eb] _pthread_body+0x7e +C [libsystem_pthread.dylib+0x6249] _pthread_start+0x42 +C [libsystem_pthread.dylib+0x240d] thread_start+0xd +C 0x0000000000000000 + +Java frames: (J=compiled Java code, j=interpreted, Vv=VM code) +J 210 java.util.zip.ZipFile.getEntry(J[BZ)J (0 bytes) @ 0x0000000114de5158 [0x0000000114de5100+0x58] +J 1543 C2 java.util.zip.ZipFile.getEntry(Ljava/lang/String;)Ljava/util/zip/ZipEntry; (101 bytes) @ 0x00000001151f8c78 [0x00000001151f8900+0x378] +J 3002 C2 sun.misc.URLClassPath$JarLoader.getResource(Ljava/lang/String;Z)Lsun/misc/Resource; (85 bytes) @ 0x000000011569a270 [0x000000011569a0e0+0x190] +J 565 C2 sun.misc.URLClassPath.getResource(Ljava/lang/String;Z)Lsun/misc/Resource; (83 bytes) @ 0x0000000114ee693c [0x0000000114ee6860+0xdc] +J 1771 C2 java.net.URLClassLoader$1.run()Ljava/lang/Object; (5 bytes) @ 0x0000000115571e88 [0x0000000115571980+0x508] +v ~StubRoutines::call_stub +J 256 java.security.AccessController.doPrivileged(Ljava/security/PrivilegedExceptionAction;Ljava/security/AccessControlContext;)Ljava/lang/Object; (0 bytes) @ 0x0000000114def6a3 [0x0000000114def640+0x63] +J 1045 C2 java.lang.ClassLoader.loadClass(Ljava/lang/String;Z)Ljava/lang/Class; (122 bytes) @ 0x0000000115030dc4 [0x0000000115030360+0xa64] +J 4851 C2 java.lang.ClassLoader.loadClass(Ljava/lang/String;)Ljava/lang/Class; (7 bytes) @ 0x000000011544395c [0x00000001154437c0+0x19c] +v ~StubRoutines::call_stub +j com.facebook.presto.tpch.TpchMetadata.()V+79 +v ~StubRoutines::call_stub +j com.facebook.presto.tpch.TpchConnectorFactory$1.getMetadata(Lcom/facebook/presto/spi/connector/ConnectorTransactionHandle;)Lcom/facebook/presto/spi/connector/ConnectorMetadata;+0 +j com.facebook.presto.transaction.InMemoryTransactionManager$TransactionMetadata$ConnectorTransactionMetadata.(Lcom/facebook/presto/spi/ConnectorId;Lcom/facebook/presto/spi/connector/Connector;Lcom/facebook/presto/spi/connector/ConnectorTransactionHandle;)V+57 +j com.facebook.presto.transaction.InMemoryTransactionManager$TransactionMetadata.createConnectorTransactionMetadata(Lcom/facebook/presto/spi/ConnectorId;Lcom/facebook/presto/metadata/Catalog;)Lcom/facebook/presto/transaction/InMemoryTransactionManager$TransactionMetadata$ConnectorTransactionMetadata;+17 +j com.facebook.presto.transaction.InMemoryTransactionManager$TransactionMetadata.getTransactionCatalogMetadata(Lcom/facebook/presto/spi/ConnectorId;)Lcom/facebook/presto/metadata/CatalogMetadata;+64 +j com.facebook.presto.transaction.InMemoryTransactionManager$TransactionMetadata.access$200(Lcom/facebook/presto/transaction/InMemoryTransactionManager$TransactionMetadata;Lcom/facebook/presto/spi/ConnectorId;)Lcom/facebook/presto/metadata/CatalogMetadata;+2 +j com.facebook.presto.transaction.InMemoryTransactionManager.lambda$getOptionalCatalogMetadata$1(Lcom/facebook/presto/transaction/InMemoryTransactionManager$TransactionMetadata;Lcom/facebook/presto/spi/ConnectorId;)Lcom/facebook/presto/metadata/CatalogMetadata;+2 +j com.facebook.presto.transaction.InMemoryTransactionManager$$Lambda$1743.apply(Ljava/lang/Object;)Ljava/lang/Object;+8 +J 10552 C2 java.util.Optional.map(Ljava/util/function/Function;)Ljava/util/Optional; (30 bytes) @ 0x0000000114f983d0 [0x0000000114f98360+0x70] +j com.facebook.presto.transaction.InMemoryTransactionManager.getOptionalCatalogMetadata(Lcom/facebook/presto/transaction/TransactionId;Ljava/lang/String;)Ljava/util/Optional;+22 +j com.facebook.presto.security.AccessControlManager.getConnectorAccessControl(Lcom/facebook/presto/transaction/TransactionId;Ljava/lang/String;)Lcom/facebook/presto/security/AccessControlManager$CatalogAccessControlEntry;+6 +j com.facebook.presto.security.AccessControlManager.checkCanSelectFromColumns(Lcom/facebook/presto/transaction/TransactionId;Lcom/facebook/presto/spi/security/Identity;Lcom/facebook/presto/metadata/QualifiedObjectName;Ljava/util/Set;)V+54 +j com.facebook.presto.security.TestAccessControlManager.lambda$testDenyCatalogAccessControl$3(Lcom/facebook/presto/security/AccessControlManager;Lcom/facebook/presto/transaction/TransactionId;)V+35 +j com.facebook.presto.security.TestAccessControlManager$$Lambda$1737.accept(Ljava/lang/Object;)V+8 +j com.facebook.presto.transaction.TransactionBuilder.lambda$execute$0(Ljava/util/function/Consumer;Lcom/facebook/presto/transaction/TransactionId;)Ljava/lang/Object;+2 +j com.facebook.presto.transaction.TransactionBuilder$$Lambda$1738.apply(Ljava/lang/Object;)Ljava/lang/Object;+8 +j com.facebook.presto.transaction.TransactionBuilder.execute(Ljava/util/function/Function;)Ljava/lang/Object;+33 +j com.facebook.presto.transaction.TransactionBuilder.execute(Ljava/util/function/Consumer;)V+14 +j com.facebook.presto.security.TestAccessControlManager.testDenyCatalogAccessControl()V+83 +v ~StubRoutines::call_stub +J 5911 sun.reflect.NativeMethodAccessorImpl.invoke0(Ljava/lang/reflect/Method;Ljava/lang/Object;[Ljava/lang/Object;)Ljava/lang/Object; (0 bytes) @ 0x0000000114f0c637 [0x0000000114f0c5c0+0x77] +J 5193 C1 sun.reflect.NativeMethodAccessorImpl.invoke(Ljava/lang/Object;[Ljava/lang/Object;)Ljava/lang/Object; (104 bytes) @ 0x0000000115b109d4 [0x0000000115b0f800+0x11d4] +J 8571 C2 java.lang.reflect.Method.invoke(Ljava/lang/Object;[Ljava/lang/Object;)Ljava/lang/Object; (62 bytes) @ 0x000000011609b770 [0x000000011609b6c0+0xb0] +J 12476 C1 org.testng.internal.MethodInvocationHelper.invokeMethod(Ljava/lang/reflect/Method;Ljava/lang/Object;[Ljava/lang/Object;)Ljava/lang/Object; (246 bytes) @ 0x0000000116adaf84 [0x0000000116ad8740+0x2844] +J 13917 C1 org.testng.internal.Invoker.invokeMethod(Ljava/lang/Object;Lorg/testng/ITestNGMethod;[Ljava/lang/Object;ILorg/testng/xml/XmlSuite;Ljava/util/Map;Lorg/testng/ITestClass;[Lorg/testng/ITestNGMethod;[Lorg/testng/ITestNGMethod;Lorg/testng/internal/ConfigurationGroupMethods;Lorg/testng/internal/Invoker$FailureContext;)Lorg/testng/ITestResult; (1164 bytes) @ 0x0000000116ca16f4 [0x0000000116c9eb80+0x2b74] +J 13898 C1 org.testng.internal.Invoker.invokeTestMethod(Ljava/lang/Object;Lorg/testng/ITestNGMethod;[Ljava/lang/Object;ILorg/testng/xml/XmlSuite;Ljava/util/Map;Lorg/testng/ITestClass;[Lorg/testng/ITestNGMethod;[Lorg/testng/ITestNGMethod;Lorg/testng/internal/ConfigurationGroupMethods;Lorg/testng/internal/Invoker$FailureContext;)Lorg/testng/ITestResult; (37 bytes) @ 0x0000000116afb344 [0x0000000116afb0e0+0x264] +J 12057 C1 org.testng.internal.Invoker.invokeTestMethods(Lorg/testng/ITestNGMethod;Lorg/testng/xml/XmlSuite;Ljava/util/Map;Lorg/testng/internal/ConfigurationGroupMethods;Ljava/lang/Object;Lorg/testng/ITestContext;)Ljava/util/List; (1271 bytes) @ 0x000000011695afcc [0x0000000116957820+0x37ac] +J 12056 C1 org.testng.internal.TestMethodWorker.invokeTestMethods(Lorg/testng/ITestNGMethod;Ljava/lang/Object;Lorg/testng/ITestContext;)V (44 bytes) @ 0x0000000116953a44 [0x0000000116953960+0xe4] +J 19701 C1 org.testng.internal.TestMethodWorker.run()V (94 bytes) @ 0x0000000117efd80c [0x0000000117efd4a0+0x36c] +j java.util.concurrent.ThreadPoolExecutor.runWorker(Ljava/util/concurrent/ThreadPoolExecutor$Worker;)V+95 +j java.util.concurrent.ThreadPoolExecutor$Worker.run()V+5 +j java.lang.Thread.run()V+11 +v ~StubRoutines::call_stub + +--------------- P R O C E S S --------------- + +Java Threads: ( => current thread ) + 0x00007fd97d0bb000 JavaThread "test-executor-1" daemon [_thread_blocked, id=170771, stack(0x0000700017e9e000,0x0000700017f9e000)] + 0x00007fd97a4a1800 JavaThread "test-executor-0" daemon [_thread_blocked, id=70167, stack(0x0000700017d9b000,0x0000700017e9b000)] + 0x00007fd97747b000 JavaThread "test-executor-1" daemon [_thread_blocked, id=167183, stack(0x0000700017c98000,0x0000700017d98000)] + 0x00007fd97b2e3000 JavaThread "test-executor-0" daemon [_thread_blocked, id=137231, stack(0x0000700017b95000,0x0000700017c95000)] + 0x00007fd97a904800 JavaThread "test-executor-1" daemon [_thread_blocked, id=147971, stack(0x000070001b031000,0x000070001b131000)] + 0x00007fd97aa5d800 JavaThread "test-executor-0" daemon [_thread_blocked, id=59155, stack(0x0000700017a92000,0x0000700017b92000)] + 0x00007fd97b4d7000 JavaThread "test-executor-1" daemon [_thread_blocked, id=137743, stack(0x000070001798f000,0x0000700017a8f000)] + 0x00007fd976354000 JavaThread "test-executor-0" daemon [_thread_blocked, id=166671, stack(0x000070000ff24000,0x0000700010024000)] + 0x00007fd97a903800 JavaThread "test-executor-1" daemon [_thread_blocked, id=157187, stack(0x000070001af2e000,0x000070001b02e000)] + 0x00007fd97a903000 JavaThread "test-executor-0" daemon [_thread_blocked, id=165899, stack(0x000070001788c000,0x000070001798c000)] + 0x00007fd976351000 JavaThread "test-executor-1" daemon [_thread_blocked, id=133907, stack(0x0000700017789000,0x0000700017889000)] + 0x00007fd977067800 JavaThread "test-executor-0" daemon [_thread_blocked, id=171283, stack(0x0000700015523000,0x0000700015623000)] + 0x00007fd97d0ba800 JavaThread "test-executor-1" daemon [_thread_blocked, id=157443, stack(0x000070001ae2b000,0x000070001af2b000)] + 0x00007fd97ca6a800 JavaThread "test-executor-0" daemon [_thread_blocked, id=157699, stack(0x000070001ad28000,0x000070001ae28000)] + 0x00007fd97a787000 JavaThread "test-executor-1" daemon [_thread_blocked, id=146691, stack(0x000070001ac25000,0x000070001ad25000)] + 0x00007fd97a786800 JavaThread "test-executor-0" daemon [_thread_blocked, id=146179, stack(0x000070001ab22000,0x000070001ac22000)] + 0x00007fd97d21e000 JavaThread "test-executor-0" daemon [_thread_blocked, id=158211, stack(0x000070001aa1f000,0x000070001ab1f000)] + 0x00007fd97ca1b000 JavaThread "test-executor-1" daemon [_thread_blocked, id=145923, stack(0x000070001a91c000,0x000070001aa1c000)] + 0x00007fd97d00a000 JavaThread "test-executor-0" daemon [_thread_blocked, id=145411, stack(0x000070001a819000,0x000070001a919000)] + 0x00007fd974260800 JavaThread "test-executor-1" daemon [_thread_blocked, id=145155, stack(0x000070001a716000,0x000070001a816000)] + 0x00007fd977357000 JavaThread "test-executor-0" daemon [_thread_blocked, id=159235, stack(0x000070001a613000,0x000070001a713000)] + 0x00007fd97a062800 JavaThread "test-executor-0" daemon [_thread_blocked, id=144899, stack(0x000070001a510000,0x000070001a610000)] + 0x00007fd977b18000 JavaThread "test-executor-1" daemon [_thread_blocked, id=159747, stack(0x000070001a40d000,0x000070001a50d000)] + 0x00007fd97ca1a800 JavaThread "test-executor-0" daemon [_thread_blocked, id=144387, stack(0x000070001a30a000,0x000070001a40a000)] + 0x00007fd97aac8800 JavaThread "test-executor-0" daemon [_thread_blocked, id=144131, stack(0x000070001a207000,0x000070001a307000)] + 0x00007fd97a7a6800 JavaThread "test-executor-1" daemon [_thread_blocked, id=160515, stack(0x000070001a104000,0x000070001a204000)] + 0x00007fd97a7a6000 JavaThread "test-executor-0" daemon [_thread_blocked, id=143619, stack(0x000070001a001000,0x000070001a101000)] + 0x00007fd978b8d000 JavaThread "test-executor-0" daemon [_thread_blocked, id=143363, stack(0x0000700019efe000,0x0000700019ffe000)] + 0x00007fd978a83000 JavaThread "test-executor-1" daemon [_thread_blocked, id=143107, stack(0x0000700019dfb000,0x0000700019efb000)] + 0x00007fd9763c4000 JavaThread "test-executor-0" daemon [_thread_blocked, id=161539, stack(0x0000700019cf8000,0x0000700019df8000)] + 0x00007fd97727e800 JavaThread "test-executor-1" daemon [_thread_blocked, id=142339, stack(0x0000700019bf5000,0x0000700019cf5000)] + 0x00007fd97727e000 JavaThread "test-executor-0" daemon [_thread_blocked, id=161795, stack(0x0000700019af2000,0x0000700019bf2000)] + 0x00007fd97469d800 JavaThread "test-executor-0" daemon [_thread_blocked, id=141571, stack(0x00007000199ef000,0x0000700019aef000)] + 0x00007fd977679000 JavaThread "test-executor-0" daemon [_thread_blocked, id=141059, stack(0x00007000198ec000,0x00007000199ec000)] + 0x00007fd97ccda000 JavaThread "test-executor-0" daemon [_thread_blocked, id=140803, stack(0x00007000197e9000,0x00007000198e9000)] + 0x00007fd974265000 JavaThread "test-executor-1" daemon [_thread_blocked, id=162563, stack(0x00007000196e6000,0x00007000197e6000)] + 0x00007fd97d001000 JavaThread "test-executor-0" daemon [_thread_blocked, id=162819, stack(0x00007000195e3000,0x00007000196e3000)] + 0x00007fd97ccc5000 JavaThread "test-executor-1" daemon [_thread_blocked, id=140291, stack(0x00007000194e0000,0x00007000195e0000)] + 0x00007fd978b0b800 JavaThread "test-executor-0" daemon [_thread_blocked, id=163331, stack(0x00007000193dd000,0x00007000194dd000)] + 0x00007fd97ab49000 JavaThread "test-executor-1" daemon [_thread_blocked, id=139523, stack(0x00007000192da000,0x00007000193da000)] + 0x00007fd979b61000 JavaThread "test-executor-0" daemon [_thread_blocked, id=163587, stack(0x00007000191d7000,0x00007000192d7000)] + 0x00007fd97aac5000 JavaThread "test-executor-1" daemon [_thread_blocked, id=163843, stack(0x00007000190d4000,0x00007000191d4000)] + 0x00007fd977fd0000 JavaThread "test-executor-0" daemon [_thread_blocked, id=164355, stack(0x0000700018fd1000,0x00007000190d1000)] + 0x00007fd976589800 JavaThread "test-executor-1" daemon [_thread_blocked, id=138755, stack(0x0000700018ece000,0x0000700018fce000)] + 0x00007fd977041000 JavaThread "test-executor-0" daemon [_thread_blocked, id=165123, stack(0x0000700018dcb000,0x0000700018ecb000)] + 0x00007fd9772ac800 JavaThread "test-executor-1" daemon [_thread_blocked, id=165379, stack(0x0000700018cc8000,0x0000700018dc8000)] + 0x00007fd97a27a000 JavaThread "test-executor-0" daemon [_thread_blocked, id=169747, stack(0x0000700014d0b000,0x0000700014e0b000)] + 0x00007fd97a279800 JavaThread "http-worker-326" [_thread_blocked, id=168979, stack(0x0000700010027000,0x0000700010127000)] + 0x00007fd97469c800 JavaThread "http-worker-325" [_thread_blocked, id=113163, stack(0x0000700018bc5000,0x0000700018cc5000)] + 0x00007fd97b65f800 JavaThread "test-executor-1" daemon [_thread_blocked, id=169487, stack(0x0000700018ac2000,0x0000700018bc2000)] + 0x00007fd97ccd5800 JavaThread "test-executor-0" daemon [_thread_blocked, id=72463, stack(0x00007000189bf000,0x0000700018abf000)] + 0x00007fd9775bb000 JavaThread "http-worker-276" [_thread_in_native, id=170251, stack(0x000070001218a000,0x000070001228a000)] + 0x00007fd97cd65800 JavaThread "http-worker-251" [_thread_in_native, id=115723, stack(0x000070000f912000,0x000070000fa12000)] + 0x00007fd977658000 JavaThread "http-worker-238" [_thread_blocked, id=90887, stack(0x000070000c77f000,0x000070000c87f000)] + 0x00007fd97951d000 JavaThread "local-query-runner-scheduler-1" daemon [_thread_blocked, id=171779, stack(0x0000700017686000,0x0000700017786000)] + 0x00007fd97732b800 JavaThread "Announcer-4" daemon [_thread_blocked, id=172035, stack(0x0000700017583000,0x0000700017683000)] + 0x00007fd9774c2000 JavaThread "Announcer-4" daemon [_thread_blocked, id=172547, stack(0x0000700017480000,0x0000700017580000)] + 0x00007fd97a95e800 JavaThread "Announcer-2" daemon [_thread_blocked, id=132867, stack(0x000070001737d000,0x000070001747d000)] + 0x00007fd97784e000 JavaThread "Announcer-3" daemon [_thread_blocked, id=173059, stack(0x000070001727a000,0x000070001737a000)] + 0x00007fd975c5a800 JavaThread "pool-112-thread-2" [_thread_in_Java, id=132355, stack(0x0000700017177000,0x0000700017277000)] +=>0x00007fd978bc8800 JavaThread "pool-112-thread-1" [_thread_in_native, id=173315, stack(0x0000700017074000,0x0000700017174000)] + 0x00007fd977fd6000 JavaThread "SplitRunner-23-230" [_thread_blocked, id=173571, stack(0x0000700016f71000,0x0000700017071000)] + 0x00007fd977fd5800 JavaThread "SplitRunner-22-229" [_thread_blocked, id=131331, stack(0x0000700016e6e000,0x0000700016f6e000)] + 0x00007fd9796d8800 JavaThread "SplitRunner-21-228" [_thread_blocked, id=174083, stack(0x0000700016d6b000,0x0000700016e6b000)] + 0x00007fd979292000 JavaThread "SplitRunner-20-227" [_thread_blocked, id=131075, stack(0x0000700016c68000,0x0000700016d68000)] + 0x00007fd97b4de000 JavaThread "SplitRunner-19-226" [_thread_blocked, id=107779, stack(0x0000700016b65000,0x0000700016c65000)] + 0x00007fd9773f5000 JavaThread "SplitRunner-18-225" [_thread_blocked, id=108291, stack(0x0000700016a62000,0x0000700016b62000)] + 0x00007fd97a06f800 JavaThread "SplitRunner-17-224" [_thread_blocked, id=107267, stack(0x000070001695f000,0x0000700016a5f000)] + 0x00007fd97b1b0000 JavaThread "SplitRunner-16-223" [_thread_blocked, id=106755, stack(0x000070001685c000,0x000070001695c000)] + 0x00007fd97aabb000 JavaThread "Announcer-3" daemon [_thread_blocked, id=106243, stack(0x0000700016759000,0x0000700016859000)] + 0x00007fd977fd2800 JavaThread "Announcer-2" daemon [_thread_blocked, id=105987, stack(0x0000700016656000,0x0000700016756000)] + 0x00007fd9765cf800 JavaThread "query-management-4" [_thread_blocked, id=105731, stack(0x0000700016553000,0x0000700016653000)] + 0x00007fd977e76000 JavaThread "query-management-3" [_thread_blocked, id=105475, stack(0x0000700016450000,0x0000700016550000)] + 0x00007fd977e75000 JavaThread "query-management-2" [_thread_blocked, id=105219, stack(0x000070001634d000,0x000070001644d000)] + 0x00007fd975eb3800 JavaThread "http-client-anonymous2-scheduler" daemon [_thread_blocked, id=104963, stack(0x000070001624a000,0x000070001634a000)] + 0x00007fd97a06a000 JavaThread "http-client-anonymous2-216" daemon [_thread_blocked, id=110339, stack(0x0000700016147000,0x0000700016247000)] + 0x00007fd977e72800 JavaThread "http-client-anonymous2-215" daemon [_thread_blocked, id=104451, stack(0x0000700016044000,0x0000700016144000)] + 0x00007fd979201000 JavaThread "http-client-anonymous2-214" daemon [_thread_blocked, id=104195, stack(0x0000700015f41000,0x0000700016041000)] + 0x00007fd979200000 JavaThread "http-client-anonymous2-213" daemon [_thread_blocked, id=111107, stack(0x0000700015e3e000,0x0000700015f3e000)] + 0x00007fd97a94c000 JavaThread "http-client-anonymous2-212" daemon [_thread_blocked, id=103683, stack(0x0000700015d3b000,0x0000700015e3b000)] + 0x00007fd97a069800 JavaThread "http-client-anonymous2-211" daemon [_thread_blocked, id=111619, stack(0x0000700015c38000,0x0000700015d38000)] + 0x00007fd97935d800 JavaThread "http-client-anonymous2-210" daemon [_thread_in_native, id=103427, stack(0x0000700015b35000,0x0000700015c35000)] + 0x00007fd975ab5800 JavaThread "http-client-anonymous2-209" daemon [_thread_in_native, id=102915, stack(0x0000700015a32000,0x0000700015b32000)] + 0x00007fd976587000 JavaThread "Announcer-1" daemon [_thread_blocked, id=102659, stack(0x000070001592f000,0x0000700015a2f000)] + 0x00007fd976512000 JavaThread "Announcer-0" daemon [_thread_blocked, id=112387, stack(0x000070001582c000,0x000070001592c000)] + 0x00007fd97a5ad800 JavaThread "VM Pause Meter" daemon [_thread_blocked, id=112643, stack(0x0000700015729000,0x0000700015829000)] + 0x00007fd97a2a2800 JavaThread "task-management-4" [_thread_blocked, id=101891, stack(0x0000700015626000,0x0000700015726000)] + 0x00007fd975c5f000 JavaThread "Scheduler-1292576622" [_thread_blocked, id=113411, stack(0x0000700015420000,0x0000700015520000)] + 0x00007fd97a94b000 JavaThread "http-worker-201" [_thread_blocked, id=100867, stack(0x000070001531d000,0x000070001541d000)] + 0x00007fd9765c9000 JavaThread "http-worker-200" [_thread_blocked, id=113923, stack(0x000070001521a000,0x000070001531a000)] + 0x00007fd97acba000 JavaThread "http-worker-199-acceptor-0@583c8e5b-http@2337bf27{HTTP/1.1,[http/1.1, h2c]}{127.0.0.1:65313}" [_thread_in_native, id=114435, stack(0x0000700015117000,0x0000700015217000)] + 0x00007fd97749a000 JavaThread "http-worker-198" [_thread_in_native, id=100355, stack(0x0000700015014000,0x0000700015114000)] + 0x00007fd97b0e1800 JavaThread "http-worker-197" [_thread_in_native, id=114947, stack(0x0000700014f11000,0x0000700015011000)] + 0x00007fd9765c8000 JavaThread "task-management-3" [_thread_blocked, id=100099, stack(0x0000700014e0e000,0x0000700014f0e000)] + 0x00007fd978822800 JavaThread "http-worker-194" [_thread_in_native, id=99587, stack(0x0000700014c08000,0x0000700014d08000)] + 0x00007fd97a29c800 JavaThread "task-management-2" [_thread_blocked, id=116227, stack(0x0000700014b05000,0x0000700014c05000)] + 0x00007fd978821800 JavaThread "task-management-1" [_thread_blocked, id=99075, stack(0x0000700014a02000,0x0000700014b02000)] + 0x00007fd978821000 JavaThread "task-management-0" [_thread_blocked, id=98819, stack(0x00007000148ff000,0x00007000149ff000)] + 0x00007fd97b0dc000 JavaThread "SplitRunner-15-190" [_thread_blocked, id=98307, stack(0x00007000147fc000,0x00007000148fc000)] + 0x00007fd977498800 JavaThread "SplitRunner-14-189" [_thread_blocked, id=97795, stack(0x00007000146f9000,0x00007000147f9000)] + 0x00007fd97b0db800 JavaThread "SplitRunner-13-188" [_thread_blocked, id=116995, stack(0x00007000145f6000,0x00007000146f6000)] + 0x00007fd97a997000 JavaThread "SplitRunner-12-187" [_thread_blocked, id=97539, stack(0x00007000144f3000,0x00007000145f3000)] + 0x00007fd978820000 JavaThread "http-client-workerInfo-scheduler" daemon [_thread_blocked, id=117507, stack(0x00007000143f0000,0x00007000144f0000)] + 0x00007fd97b3b4800 JavaThread "http-client-workerInfo-185" daemon [_thread_blocked, id=97027, stack(0x00007000142ed000,0x00007000143ed000)] + 0x00007fd977495800 JavaThread "http-client-workerInfo-184" daemon [_thread_blocked, id=118275, stack(0x00007000141ea000,0x00007000142ea000)] + 0x00007fd977acc000 JavaThread "http-client-workerInfo-183" daemon [_thread_blocked, id=96515, stack(0x00007000140e7000,0x00007000141e7000)] + 0x00007fd978807000 JavaThread "http-client-workerInfo-182" daemon [_thread_blocked, id=96259, stack(0x0000700013fe4000,0x00007000140e4000)] + 0x00007fd975ab2800 JavaThread "http-client-workerInfo-181" daemon [_thread_blocked, id=119043, stack(0x0000700013ee1000,0x0000700013fe1000)] + 0x00007fd978806800 JavaThread "http-client-workerInfo-180" daemon [_thread_blocked, id=96003, stack(0x0000700013dde000,0x0000700013ede000)] + 0x00007fd978805800 JavaThread "http-client-workerInfo-179" daemon [_thread_in_native, id=95491, stack(0x0000700013cdb000,0x0000700013ddb000)] + 0x00007fd977acb800 JavaThread "http-client-workerInfo-178" daemon [_thread_in_native, id=119811, stack(0x0000700013bd8000,0x0000700013cd8000)] + 0x00007fd97935b800 JavaThread "query-purger" [_thread_blocked, id=120067, stack(0x0000700013ad5000,0x0000700013bd5000)] + 0x00007fd97a29b800 JavaThread "http-client-exchange-scheduler" daemon [_thread_blocked, id=94979, stack(0x00007000139d2000,0x0000700013ad2000)] + 0x00007fd97a996800 JavaThread "http-client-exchange-175" daemon [_thread_blocked, id=94723, stack(0x00007000138cf000,0x00007000139cf000)] + 0x00007fd978805000 JavaThread "http-client-exchange-174" daemon [_thread_blocked, id=121091, stack(0x00007000137cc000,0x00007000138cc000)] + 0x00007fd977495000 JavaThread "http-client-exchange-173" daemon [_thread_blocked, id=94211, stack(0x00007000136c9000,0x00007000137c9000)] + 0x00007fd9798e1000 JavaThread "http-client-exchange-172" daemon [_thread_blocked, id=93955, stack(0x00007000135c6000,0x00007000136c6000)] + 0x00007fd97935a800 JavaThread "http-client-exchange-171" daemon [_thread_blocked, id=121859, stack(0x00007000134c3000,0x00007000135c3000)] + 0x00007fd975aad800 JavaThread "http-client-exchange-170" daemon [_thread_blocked, id=93699, stack(0x00007000133c0000,0x00007000134c0000)] + 0x00007fd97a34c000 JavaThread "http-client-exchange-169" daemon [_thread_in_native, id=122627, stack(0x00007000132bd000,0x00007000133bd000)] + 0x00007fd9793fe800 JavaThread "http-client-exchange-168" daemon [_thread_in_native, id=93443, stack(0x00007000131ba000,0x00007000132ba000)] + 0x00007fd97425a000 JavaThread "query-management-1" [_thread_blocked, id=93187, stack(0x00007000130b7000,0x00007000131b7000)] + 0x00007fd97a34b000 JavaThread "query-management-0" [_thread_blocked, id=123395, stack(0x0000700012fb4000,0x00007000130b4000)] + 0x00007fd97a34a800 JavaThread "http-client-scheduler-scheduler" daemon [_thread_blocked, id=123907, stack(0x0000700012eb1000,0x0000700012fb1000)] + 0x00007fd9798e0800 JavaThread "http-client-scheduler-164" daemon [_thread_blocked, id=124419, stack(0x0000700012dae000,0x0000700012eae000)] + 0x00007fd97b3b2000 JavaThread "http-client-scheduler-163" daemon [_thread_blocked, id=124675, stack(0x0000700012cab000,0x0000700012dab000)] + 0x00007fd97b3b1000 JavaThread "http-client-scheduler-162" daemon [_thread_blocked, id=124931, stack(0x0000700012ba8000,0x0000700012ca8000)] + 0x00007fd97b3b0800 JavaThread "http-client-scheduler-161" daemon [_thread_blocked, id=125187, stack(0x0000700012aa5000,0x0000700012ba5000)] + 0x00007fd97b1c0000 JavaThread "http-client-scheduler-160" daemon [_thread_blocked, id=125699, stack(0x00007000129a2000,0x0000700012aa2000)] + 0x00007fd97935a000 JavaThread "http-client-scheduler-159" daemon [_thread_blocked, id=126211, stack(0x000070001289f000,0x000070001299f000)] + 0x00007fd977625000 JavaThread "http-client-scheduler-158" daemon [_thread_in_native, id=91651, stack(0x000070001279c000,0x000070001289c000)] + 0x00007fd97aaa2800 JavaThread "http-client-scheduler-157" daemon [_thread_in_native, id=126723, stack(0x0000700012699000,0x0000700012799000)] + 0x00007fd975aad000 JavaThread "FinalizerService" daemon [_thread_blocked, id=91395, stack(0x0000700012596000,0x0000700012696000)] + 0x00007fd979714800 JavaThread "node-monitor-0" [_thread_blocked, id=127491, stack(0x0000700012493000,0x0000700012593000)] + 0x00007fd979714000 JavaThread "ResourceGroupManager" daemon [_thread_blocked, id=128003, stack(0x0000700012390000,0x0000700012490000)] + 0x00007fd974038800 JavaThread "node-state-poller-0" [_thread_blocked, id=128515, stack(0x000070001228d000,0x000070001238d000)] + 0x00007fd975aac000 JavaThread "http-client-node-manager-scheduler" daemon [_thread_blocked, id=128771, stack(0x0000700012087000,0x0000700012187000)] + 0x00007fd97b1bf000 JavaThread "http-client-node-manager-150" daemon [_thread_blocked, id=90371, stack(0x0000700011f84000,0x0000700012084000)] + 0x00007fd975aab800 JavaThread "http-client-node-manager-149" daemon [_thread_in_native, id=129539, stack(0x0000700011e81000,0x0000700011f81000)] + 0x00007fd978804000 JavaThread "http-client-node-manager-148" daemon [_thread_blocked, id=129795, stack(0x0000700011d7e000,0x0000700011e7e000)] + 0x00007fd97a8e9000 JavaThread "http-client-node-manager-147" daemon [_thread_blocked, id=89859, stack(0x0000700011c7b000,0x0000700011d7b000)] + 0x00007fd978803800 JavaThread "http-client-node-manager-146" daemon [_thread_blocked, id=89347, stack(0x0000700011b78000,0x0000700011c78000)] + 0x00007fd975aaa800 JavaThread "http-client-node-manager-145" daemon [_thread_blocked, id=88835, stack(0x0000700011a75000,0x0000700011b75000)] + 0x00007fd979711800 JavaThread "http-client-node-manager-144" daemon [_thread_blocked, id=130307, stack(0x0000700011972000,0x0000700011a72000)] + 0x00007fd978802800 JavaThread "http-client-node-manager-143" daemon [_thread_in_native, id=88323, stack(0x000070001186f000,0x000070001196f000)] + 0x00007fd978802000 JavaThread "http-client-failure-detector-scheduler" daemon [_thread_blocked, id=87811, stack(0x000070001176c000,0x000070001186c000)] + 0x00007fd975aaa000 JavaThread "http-client-failure-detector-141" daemon [_thread_blocked, id=87555, stack(0x0000700011669000,0x0000700011769000)] + 0x00007fd97b1be800 JavaThread "http-client-failure-detector-140" daemon [_thread_blocked, id=66051, stack(0x0000700011566000,0x0000700011666000)] + 0x00007fd97acc2000 JavaThread "http-client-failure-detector-139" daemon [_thread_blocked, id=65795, stack(0x0000700011463000,0x0000700011563000)] + 0x00007fd97b1bd800 JavaThread "http-client-failure-detector-138" daemon [_thread_blocked, id=66563, stack(0x0000700011360000,0x0000700011460000)] + 0x00007fd97b244800 JavaThread "http-client-failure-detector-137" daemon [_thread_blocked, id=67075, stack(0x000070001125d000,0x000070001135d000)] + 0x00007fd9796fd000 JavaThread "http-client-failure-detector-136" daemon [_thread_blocked, id=65027, stack(0x000070001115a000,0x000070001125a000)] + 0x00007fd9796fc000 JavaThread "http-client-failure-detector-135" daemon [_thread_in_native, id=64771, stack(0x0000700011057000,0x0000700011157000)] + 0x00007fd9796fb800 JavaThread "http-client-failure-detector-134" daemon [_thread_in_native, id=64259, stack(0x0000700010f54000,0x0000700011054000)] + 0x00007fd97a348800 JavaThread "http-client-memoryManager-scheduler" daemon [_thread_blocked, id=67587, stack(0x0000700010e51000,0x0000700010f51000)] + 0x00007fd9796fa800 JavaThread "http-client-memoryManager-132" daemon [_thread_blocked, id=63491, stack(0x0000700010d4e000,0x0000700010e4e000)] + 0x00007fd975aa9000 JavaThread "http-client-memoryManager-131" daemon [_thread_blocked, id=67843, stack(0x0000700010c4b000,0x0000700010d4b000)] + 0x00007fd975aa8800 JavaThread "http-client-memoryManager-130" daemon [_thread_in_native, id=62979, stack(0x0000700010b48000,0x0000700010c48000)] + 0x00007fd97a346800 JavaThread "http-client-memoryManager-129" daemon [_thread_blocked, id=68355, stack(0x0000700010a45000,0x0000700010b45000)] + 0x00007fd97b06b800 JavaThread "http-client-memoryManager-128" daemon [_thread_blocked, id=62211, stack(0x0000700010942000,0x0000700010a42000)] + 0x00007fd979400800 JavaThread "http-client-memoryManager-127" daemon [_thread_blocked, id=68611, stack(0x000070001083f000,0x000070001093f000)] + 0x00007fd979400000 JavaThread "http-client-memoryManager-126" daemon [_thread_blocked, id=61443, stack(0x000070001073c000,0x000070001083c000)] + 0x00007fd97acb0800 JavaThread "http-client-memoryManager-125" daemon [_thread_in_native, id=69123, stack(0x0000700010639000,0x0000700010739000)] + 0x00007fd976649000 JavaThread "transaction-idle-check" daemon [_thread_blocked, id=60931, stack(0x0000700010536000,0x0000700010636000)] + 0x00007fd976152800 JavaThread "Announcer-1" daemon [_thread_blocked, id=69635, stack(0x0000700010433000,0x0000700010533000)] + 0x00007fd977d5a000 JavaThread "Announcer-0" daemon [_thread_blocked, id=60419, stack(0x0000700010330000,0x0000700010430000)] + 0x00007fd9796dc000 JavaThread "VM Pause Meter" daemon [_thread_blocked, id=59907, stack(0x000070001022d000,0x000070001032d000)] + 0x00007fd97b099000 JavaThread "http-worker-119" [_thread_in_native, id=69891, stack(0x000070001012a000,0x000070001022a000)] + 0x00007fd97761d000 JavaThread "Scheduler-510382691" [_thread_blocked, id=70915, stack(0x000070000fe21000,0x000070000ff21000)] + 0x00007fd97761c000 JavaThread "http-worker-115" [_thread_blocked, id=71171, stack(0x000070000fd1e000,0x000070000fe1e000)] + 0x00007fd97b09c800 JavaThread "http-worker-114" [_thread_blocked, id=58627, stack(0x000070000fc1b000,0x000070000fd1b000)] + 0x00007fd976632800 JavaThread "http-worker-113-acceptor-0@2e6b9d43-http@7d685f4e{HTTP/1.1,[http/1.1, h2c]}{127.0.0.1:65287}" [_thread_in_native, id=71939, stack(0x000070000fb18000,0x000070000fc18000)] + 0x00007fd9798dd800 JavaThread "http-worker-112" [_thread_in_native, id=72195, stack(0x000070000fa15000,0x000070000fb15000)] + 0x00007fd979a30800 JavaThread "query-management-4" [_thread_blocked, id=72963, stack(0x000070000f80f000,0x000070000f90f000)] + 0x00007fd975e93800 JavaThread "http-worker-109" [_thread_blocked, id=57603, stack(0x000070000f70c000,0x000070000f80c000)] + 0x00007fd97772b000 JavaThread "http-worker-108" [_thread_in_native, id=73219, stack(0x000070000f609000,0x000070000f709000)] + 0x00007fd97a58a800 JavaThread "query-management-3" [_thread_blocked, id=73731, stack(0x000070000f506000,0x000070000f606000)] + 0x00007fd97765b000 JavaThread "query-management-2" [_thread_blocked, id=56835, stack(0x000070000f403000,0x000070000f503000)] + 0x00007fd97637e000 JavaThread "task-management-4" [_thread_blocked, id=74243, stack(0x000070000f300000,0x000070000f400000)] + 0x00007fd97b4e7000 JavaThread "task-management-3" [_thread_blocked, id=74499, stack(0x000070000f1fd000,0x000070000f2fd000)] + 0x00007fd97a58a000 JavaThread "task-management-2" [_thread_blocked, id=56323, stack(0x000070000f0fa000,0x000070000f1fa000)] + 0x00007fd976369800 JavaThread "task-management-1" [_thread_blocked, id=75011, stack(0x000070000eff7000,0x000070000f0f7000)] + 0x00007fd9797ee800 JavaThread "task-management-0" [_thread_blocked, id=55811, stack(0x000070000eef4000,0x000070000eff4000)] + 0x00007fd97aca5800 JavaThread "SplitRunner-11-100" [_thread_blocked, id=75523, stack(0x000070000edf1000,0x000070000eef1000)] + 0x00007fd975e97800 JavaThread "SplitRunner-10-99" [_thread_blocked, id=75779, stack(0x000070000ecee000,0x000070000edee000)] + 0x00007fd97765a800 JavaThread "SplitRunner-9-98" [_thread_blocked, id=76035, stack(0x000070000ebeb000,0x000070000eceb000)] + 0x00007fd975e96800 JavaThread "SplitRunner-8-97" [_thread_blocked, id=76291, stack(0x000070000eae8000,0x000070000ebe8000)] + 0x00007fd97a594800 JavaThread "http-client-workerInfo-scheduler" daemon [_thread_blocked, id=54531, stack(0x000070000e9e5000,0x000070000eae5000)] + 0x00007fd976369000 JavaThread "http-client-workerInfo-95" daemon [_thread_blocked, id=54019, stack(0x000070000e8e2000,0x000070000e9e2000)] + 0x00007fd977bfb800 JavaThread "http-client-workerInfo-94" daemon [_thread_blocked, id=53763, stack(0x000070000e7df000,0x000070000e8df000)] + 0x00007fd97990e000 JavaThread "http-client-workerInfo-93" daemon [_thread_blocked, id=53251, stack(0x000070000e6dc000,0x000070000e7dc000)] + 0x00007fd975e96000 JavaThread "http-client-workerInfo-92" daemon [_thread_blocked, id=77059, stack(0x000070000e5d9000,0x000070000e6d9000)] + 0x00007fd975e95000 JavaThread "http-client-workerInfo-91" daemon [_thread_blocked, id=52483, stack(0x000070000e4d6000,0x000070000e5d6000)] + 0x00007fd9797ee000 JavaThread "http-client-workerInfo-90" daemon [_thread_blocked, id=52227, stack(0x000070000e3d3000,0x000070000e4d3000)] + 0x00007fd9797ed000 JavaThread "http-client-workerInfo-89" daemon [_thread_in_native, id=77571, stack(0x000070000e2d0000,0x000070000e3d0000)] + 0x00007fd97b55d800 JavaThread "http-client-workerInfo-88" daemon [_thread_in_native, id=51715, stack(0x000070000e1cd000,0x000070000e2cd000)] + 0x00007fd97a585800 JavaThread "query-purger" [_thread_blocked, id=78083, stack(0x000070000e0ca000,0x000070000e1ca000)] + 0x00007fd9797ec800 JavaThread "http-client-exchange-scheduler" daemon [_thread_blocked, id=50947, stack(0x000070000dfc7000,0x000070000e0c7000)] + 0x00007fd975e94800 JavaThread "http-client-exchange-85" daemon [_thread_blocked, id=78339, stack(0x000070000dec4000,0x000070000dfc4000)] + 0x00007fd9797cf800 JavaThread "http-client-exchange-84" daemon [_thread_blocked, id=78851, stack(0x000070000ddc1000,0x000070000dec1000)] + 0x00007fd97a583000 JavaThread "http-client-exchange-83" daemon [_thread_blocked, id=50435, stack(0x000070000dcbe000,0x000070000ddbe000)] + 0x00007fd97ac6e000 JavaThread "http-client-exchange-82" daemon [_thread_blocked, id=79619, stack(0x000070000dbbb000,0x000070000dcbb000)] + 0x00007fd9761d4800 JavaThread "http-client-exchange-81" daemon [_thread_blocked, id=80131, stack(0x000070000dab8000,0x000070000dbb8000)] + 0x00007fd9761d3800 JavaThread "http-client-exchange-80" daemon [_thread_blocked, id=49923, stack(0x000070000d9b5000,0x000070000dab5000)] + 0x00007fd978abb000 JavaThread "http-client-exchange-79" daemon [_thread_in_native, id=49667, stack(0x000070000d8b2000,0x000070000d9b2000)] + 0x00007fd97b55b000 JavaThread "http-client-exchange-78" daemon [_thread_in_native, id=80643, stack(0x000070000d7af000,0x000070000d8af000)] + 0x00007fd975e8b000 JavaThread "query-management-1" [_thread_blocked, id=48899, stack(0x000070000d6ac000,0x000070000d7ac000)] + 0x00007fd977659800 JavaThread "query-management-0" [_thread_blocked, id=81155, stack(0x000070000d5a9000,0x000070000d6a9000)] + 0x00007fd97b55a000 JavaThread "http-client-scheduler-scheduler" daemon [_thread_blocked, id=48643, stack(0x000070000d4a6000,0x000070000d5a6000)] + 0x00007fd978aba000 JavaThread "http-client-scheduler-74" daemon [_thread_blocked, id=81923, stack(0x000070000d3a3000,0x000070000d4a3000)] + 0x00007fd97b532800 JavaThread "http-client-scheduler-73" daemon [_thread_blocked, id=82179, stack(0x000070000d2a0000,0x000070000d3a0000)] + 0x00007fd97b532000 JavaThread "http-client-scheduler-72" daemon [_thread_blocked, id=82691, stack(0x000070000d19d000,0x000070000d29d000)] + 0x00007fd97b531000 JavaThread "http-client-scheduler-71" daemon [_thread_blocked, id=83203, stack(0x000070000d09a000,0x000070000d19a000)] + 0x00007fd97b530800 JavaThread "http-client-scheduler-70" daemon [_thread_blocked, id=83459, stack(0x000070000cf97000,0x000070000d097000)] + 0x00007fd97ac6d800 JavaThread "http-client-scheduler-69" daemon [_thread_blocked, id=83971, stack(0x000070000ce94000,0x000070000cf94000)] + 0x00007fd97abe6800 JavaThread "http-client-scheduler-68" daemon [_thread_in_native, id=47619, stack(0x000070000cd91000,0x000070000ce91000)] + 0x00007fd977660000 JavaThread "http-client-scheduler-67" daemon [_thread_in_native, id=84483, stack(0x000070000cc8e000,0x000070000cd8e000)] + 0x00007fd977657000 JavaThread "FinalizerService" daemon [_thread_blocked, id=84995, stack(0x000070000cb8b000,0x000070000cc8b000)] + 0x00007fd97a582000 JavaThread "node-monitor-0" [_thread_blocked, id=47107, stack(0x000070000ca88000,0x000070000cb88000)] + 0x00007fd97a580000 JavaThread "ResourceGroupManager" daemon [_thread_blocked, id=85507, stack(0x000070000c985000,0x000070000ca85000)] + 0x00007fd97a850000 JavaThread "node-state-poller-0" [_thread_blocked, id=46595, stack(0x000070000c882000,0x000070000c982000)] + 0x00007fd977656800 JavaThread "http-client-node-manager-scheduler" daemon [_thread_blocked, id=45827, stack(0x000070000c67c000,0x000070000c77c000)] + 0x00007fd97abcf000 JavaThread "http-client-node-manager-60" daemon [_thread_blocked, id=86019, stack(0x000070000c579000,0x000070000c679000)] + 0x00007fd977653800 JavaThread "http-client-node-manager-59" daemon [_thread_blocked, id=86275, stack(0x000070000c476000,0x000070000c576000)] + 0x00007fd977653000 JavaThread "http-client-node-manager-58" daemon [_thread_blocked, id=86531, stack(0x000070000c373000,0x000070000c473000)] + 0x00007fd975e8a000 JavaThread "http-client-node-manager-57" daemon [_thread_blocked, id=44547, stack(0x000070000c270000,0x000070000c370000)] + 0x00007fd97a2c0800 JavaThread "http-client-node-manager-56" daemon [_thread_in_native, id=86787, stack(0x000070000c16d000,0x000070000c26d000)] + 0x00007fd977bf9800 JavaThread "http-client-node-manager-55" daemon [_thread_blocked, id=43779, stack(0x000070000c06a000,0x000070000c16a000)] + 0x00007fd975e89800 JavaThread "http-client-node-manager-54" daemon [_thread_blocked, id=43523, stack(0x000070000bf67000,0x000070000c067000)] + 0x00007fd975e88800 JavaThread "http-client-node-manager-53" daemon [_thread_in_native, id=30979, stack(0x000070000be64000,0x000070000bf64000)] + 0x00007fd97a2bd800 JavaThread "http-client-failure-detector-scheduler" daemon [_thread_blocked, id=30723, stack(0x000070000bd61000,0x000070000be61000)] + 0x00007fd977650000 JavaThread "http-client-failure-detector-51" daemon [_thread_blocked, id=31747, stack(0x000070000bc5e000,0x000070000bd5e000)] + 0x00007fd97b107800 JavaThread "http-client-failure-detector-50" daemon [_thread_blocked, id=29955, stack(0x000070000bb5b000,0x000070000bc5b000)] + 0x00007fd97764f800 JavaThread "http-client-failure-detector-49" daemon [_thread_blocked, id=32259, stack(0x000070000ba58000,0x000070000bb58000)] + 0x00007fd97764e800 JavaThread "http-client-failure-detector-48" daemon [_thread_blocked, id=32515, stack(0x000070000b955000,0x000070000ba55000)] + 0x00007fd97764e000 JavaThread "http-client-failure-detector-47" daemon [_thread_blocked, id=33027, stack(0x000070000b852000,0x000070000b952000)] + 0x00007fd977bf8800 JavaThread "http-client-failure-detector-46" daemon [_thread_blocked, id=33283, stack(0x000070000b74f000,0x000070000b84f000)] + 0x00007fd977bf8000 JavaThread "http-client-failure-detector-45" daemon [_thread_in_native, id=28931, stack(0x000070000b64c000,0x000070000b74c000)] + 0x00007fd975e88000 JavaThread "http-client-failure-detector-44" daemon [_thread_in_native, id=33539, stack(0x000070000b549000,0x000070000b649000)] + 0x00007fd97a2bd000 JavaThread "http-client-memoryManager-scheduler" daemon [_thread_blocked, id=28163, stack(0x000070000b446000,0x000070000b546000)] + 0x00007fd97990f000 JavaThread "http-client-memoryManager-42" daemon [_thread_blocked, id=33795, stack(0x000070000b343000,0x000070000b443000)] + 0x00007fd97763e000 JavaThread "http-client-memoryManager-41" daemon [_thread_blocked, id=34307, stack(0x000070000b240000,0x000070000b340000)] + 0x00007fd9794c2800 JavaThread "http-client-memoryManager-40" daemon [_thread_blocked, id=27395, stack(0x000070000b13d000,0x000070000b23d000)] + 0x00007fd977e18800 JavaThread "http-client-memoryManager-39" daemon [_thread_blocked, id=27139, stack(0x000070000b03a000,0x000070000b13a000)] + 0x00007fd97a08f000 JavaThread "http-client-memoryManager-38" daemon [_thread_blocked, id=26883, stack(0x000070000af37000,0x000070000b037000)] + 0x00007fd97a2ad800 JavaThread "http-client-memoryManager-37" daemon [_thread_in_native, id=35075, stack(0x000070000ae34000,0x000070000af34000)] + 0x00007fd975e87000 JavaThread "http-client-memoryManager-36" daemon [_thread_blocked, id=26371, stack(0x000070000ad31000,0x000070000ae31000)] + 0x00007fd9747ec000 JavaThread "http-client-memoryManager-35" daemon [_thread_in_native, id=35587, stack(0x000070000ac2e000,0x000070000ad2e000)] + 0x00007fd97b102000 JavaThread "transaction-idle-check" daemon [_thread_blocked, id=36099, stack(0x000070000ab2b000,0x000070000ac2b000)] + 0x00007fd9758f4800 JavaThread "http-client-anonymous1-scheduler" daemon [_thread_blocked, id=36611, stack(0x000070000aa28000,0x000070000ab28000)] + 0x00007fd979921000 JavaThread "http-client-anonymous1-32" daemon [_thread_blocked, id=37123, stack(0x000070000a925000,0x000070000aa25000)] + 0x00007fd97b072000 JavaThread "http-client-anonymous1-31" daemon [_thread_blocked, id=37635, stack(0x000070000a822000,0x000070000a922000)] + 0x00007fd97b077000 JavaThread "http-client-anonymous1-30" daemon [_thread_blocked, id=38147, stack(0x000070000a71f000,0x000070000a81f000)] + 0x00007fd97a2ae800 JavaThread "http-client-anonymous1-29" daemon [_thread_blocked, id=38659, stack(0x000070000a61c000,0x000070000a71c000)] + 0x00007fd97a2b5000 JavaThread "http-client-anonymous1-28" daemon [_thread_blocked, id=25603, stack(0x000070000a519000,0x000070000a619000)] + 0x00007fd975b8a000 JavaThread "http-client-anonymous1-27" daemon [_thread_blocked, id=25347, stack(0x000070000a416000,0x000070000a516000)] + 0x00007fd9740cd800 JavaThread "http-client-anonymous1-26" daemon [_thread_in_native, id=25091, stack(0x000070000a313000,0x000070000a413000)] + 0x00007fd974649800 JavaThread "http-client-anonymous1-25" daemon [_thread_in_native, id=24835, stack(0x000070000a210000,0x000070000a310000)] + 0x00007fd97b263800 JavaThread "SplitRunner-7-24" [_thread_blocked, id=24323, stack(0x000070000a10d000,0x000070000a20d000)] + 0x00007fd97aad3000 JavaThread "SplitRunner-6-23" [_thread_blocked, id=39683, stack(0x000070000a00a000,0x000070000a10a000)] + 0x00007fd97667f800 JavaThread "SplitRunner-5-22" [_thread_blocked, id=39939, stack(0x0000700009f07000,0x000070000a007000)] + 0x00007fd97a2b1800 JavaThread "SplitRunner-4-21" [_thread_blocked, id=23299, stack(0x0000700009e04000,0x0000700009f04000)] + 0x00007fd975870800 JavaThread "SplitRunner-3-20" [_thread_blocked, id=40195, stack(0x0000700009d01000,0x0000700009e01000)] + 0x00007fd97b1db800 JavaThread "SplitRunner-2-19" [_thread_blocked, id=40451, stack(0x0000700009bfe000,0x0000700009cfe000)] + 0x00007fd9798a8800 JavaThread "SplitRunner-1-18" [_thread_blocked, id=40963, stack(0x0000700009afb000,0x0000700009bfb000)] + 0x00007fd979817800 JavaThread "SplitRunner-0-17" [_thread_blocked, id=22531, stack(0x00007000099f8000,0x0000700009af8000)] + 0x00007fd97732e800 JavaThread "local-query-runner-scheduler-0" daemon [_thread_blocked, id=41475, stack(0x00007000098f5000,0x00007000099f5000)] + 0x00007fd9790c6000 JavaThread "FinalizerService" daemon [_thread_blocked, id=2567, stack(0x00007000097f2000,0x00007000098f2000)] + 0x00007fd9780ab000 JavaThread "TestHangMonitor" daemon [_thread_blocked, id=42243, stack(0x00007000096ef000,0x00007000097ef000)] + 0x00007fd97408b000 JavaThread "process reaper" daemon [_thread_blocked, id=42755, stack(0x00007000096c4000,0x00007000096ec000)] + 0x00007fd976153000 JavaThread "surefire-forkedjvm-ping-30s" daemon [_thread_blocked, id=43267, stack(0x00007000095c1000,0x00007000096c1000)] + 0x00007fd976151000 JavaThread "surefire-forkedjvm-command-thread" daemon [_thread_in_native, id=21763, stack(0x00007000094be000,0x00007000095be000)] + 0x00007fd97482e000 JavaThread "Service Thread" daemon [_thread_blocked, id=15875, stack(0x00007000092b8000,0x00007000093b8000)] + 0x00007fd976800800 JavaThread "C1 CompilerThread3" daemon [_thread_blocked, id=15619, stack(0x00007000091b5000,0x00007000092b5000)] + 0x00007fd97481d800 JavaThread "C2 CompilerThread2" daemon [_thread_blocked, id=17155, stack(0x00007000090b2000,0x00007000091b2000)] + 0x00007fd97481a000 JavaThread "C2 CompilerThread1" daemon [_thread_blocked, id=14851, stack(0x0000700008faf000,0x00007000090af000)] + 0x00007fd97780e800 JavaThread "C2 CompilerThread0" daemon [_thread_blocked, id=14595, stack(0x0000700008eac000,0x0000700008fac000)] + 0x00007fd976029800 JavaThread "Signal Dispatcher" daemon [_thread_blocked, id=17667, stack(0x0000700008da9000,0x0000700008ea9000)] + 0x00007fd974020800 JavaThread "Finalizer" daemon [_thread_blocked, id=12547, stack(0x0000700008ca6000,0x0000700008da6000)] + 0x00007fd975803000 JavaThread "Reference Handler" daemon [_thread_blocked, id=12291, stack(0x0000700008ba3000,0x0000700008ca3000)] + 0x00007fd976005800 JavaThread "main" [_thread_blocked, id=9987, stack(0x0000700008185000,0x0000700008285000)] + +Other Threads: + 0x00007fd976014000 VMThread [stack: 0x0000700008aa0000,0x0000700008ba0000] [id=19203] + 0x00007fd974023800 WatcherThread [stack: 0x00007000093bb000,0x00007000094bb000] [id=16387] + +VM state:not at safepoint (normal execution) + +VM Mutex/Monitor currently owned by a thread: None + +heap address: 0x0000000740000000, size: 2048 MB, Compressed Oops mode: Zero based, Oop shift amount: 3 +Narrow klass base: 0x0000000000000000, Narrow klass shift: 3 +Compressed class space size: 1073741824 Address: 0x00000007c0000000 + +Heap: + PSYoungGen total 509952K, used 126420K [0x0000000795580000, 0x00000007c0000000, 0x00000007c0000000) + eden space 343552K, 35% used [0x0000000795580000,0x000000079cd7d2e0,0x00000007aa500000) + from space 166400K, 2% used [0x00000007b5d80000,0x00000007b60f8000,0x00000007c0000000) + to space 177664K, 0% used [0x00000007aa500000,0x00000007aa500000,0x00000007b5280000) + ParOldGen total 1398272K, used 1020755K [0x0000000740000000, 0x0000000795580000, 0x0000000795580000) + object space 1398272K, 73% used [0x0000000740000000,0x000000077e4d4f88,0x0000000795580000) + Metaspace used 83016K, capacity 88695K, committed 100440K, reserved 1136640K + class space used 9264K, capacity 10507K, committed 13184K, reserved 1048576K + +Card table byte_map: [0x000000010d95e000,0x000000010dd5f000] byte_map_base: 0x0000000109f5e000 + +Marking Bits: (ParMarkBitMap*) 0x000000010d29bfa0 + Begin Bits: [0x000000010e00a000, 0x000000011000a000) + End Bits: [0x000000011000a000, 0x000000011200a000) + +Polling page: 0x000000010b9ee000 + +CodeCache: size=245760Kb used=57315Kb max_used=57681Kb free=188444Kb + bounds [0x0000000114c6b000, 0x000000011852b000, 0x0000000123c6b000] + total_blobs=17832 nmethods=17012 adapters=733 + compilation: enabled + +Compilation events (10 events): +Event: 448.327 Thread 0x00007fd97481a000 23147 4 com.facebook.presto.sql.relational.FunctionResolution:: (18 bytes) +Event: 448.328 Thread 0x00007fd97481a000 nmethod 23147 0x0000000116686990 code [0x0000000116686ae0, 0x0000000116686b78] +Event: 448.410 Thread 0x00007fd976800800 23148 3 sun.nio.cs.UTF_8$Encoder::encodeArrayLoop (489 bytes) +Event: 448.413 Thread 0x00007fd976800800 nmethod 23148 0x0000000115229490 code [0x00000001152299e0, 0x000000011522cfe8] +Event: 448.574 Thread 0x00007fd97780e800 23149 ! 4 com.sun.proxy.$Proxy44::annotationType (29 bytes) +Event: 448.576 Thread 0x00007fd97780e800 nmethod 23149 0x0000000117bd9c90 code [0x0000000117bd9e00, 0x0000000117bd9f68] +Event: 448.807 Thread 0x00007fd97481d800 23150 4 com.google.common.cache.LocalCache$LoadingValueReference::get (10 bytes) +Event: 448.807 Thread 0x00007fd97481d800 nmethod 23150 0x00000001158fd810 code [0x00000001158fd960, 0x00000001158fd9f8] +Event: 449.092 Thread 0x00007fd976800800 23151 3 com.facebook.presto.type.CharParametricType::createType (73 bytes) +Event: 449.093 Thread 0x00007fd976800800 nmethod 23151 0x000000011832b1d0 code [0x000000011832b420, 0x000000011832bff8] + +GC Heap History (10 events): +Event: 440.865 GC heap before +{Heap before GC invocations=297 (full 9): + PSYoungGen total 516608K, used 431121K [0x0000000795580000, 0x00000007c0000000, 0x00000007c0000000) + eden space 340480K, 100% used [0x0000000795580000,0x00000007aa200000,0x00000007aa200000) + from space 176128K, 51% used [0x00000007aa200000,0x00000007afa846c0,0x00000007b4e00000) + to space 170496K, 0% used [0x00000007b5980000,0x00000007b5980000,0x00000007c0000000) + ParOldGen total 1398272K, used 878604K [0x0000000740000000, 0x0000000795580000, 0x0000000795580000) + object space 1398272K, 62% used [0x0000000740000000,0x0000000775a03198,0x0000000795580000) + Metaspace used 82960K, capacity 88649K, committed 100440K, reserved 1136640K + class space used 9256K, capacity 10498K, committed 13184K, reserved 1048576K +Event: 441.123 GC heap after +Heap after GC invocations=297 (full 9): + PSYoungGen total 534016K, used 105031K [0x0000000795580000, 0x00000007c0000000, 0x00000007c0000000) + eden space 363520K, 0% used [0x0000000795580000,0x0000000795580000,0x00000007ab880000) + from space 170496K, 61% used [0x00000007b5980000,0x00000007bc011ef8,0x00000007c0000000) + to space 164864K, 0% used [0x00000007ab880000,0x00000007ab880000,0x00000007b5980000) + ParOldGen total 1398272K, used 972374K [0x0000000740000000, 0x0000000795580000, 0x0000000795580000) + object space 1398272K, 69% used [0x0000000740000000,0x000000077b595888,0x0000000795580000) + Metaspace used 82960K, capacity 88649K, committed 100440K, reserved 1136640K + class space used 9256K, capacity 10498K, committed 13184K, reserved 1048576K +} +Event: 442.029 GC heap before +{Heap before GC invocations=298 (full 9): + PSYoungGen total 534016K, used 468551K [0x0000000795580000, 0x00000007c0000000, 0x00000007c0000000) + eden space 363520K, 100% used [0x0000000795580000,0x00000007ab880000,0x00000007ab880000) + from space 170496K, 61% used [0x00000007b5980000,0x00000007bc011ef8,0x00000007c0000000) + to space 164864K, 0% used [0x00000007ab880000,0x00000007ab880000,0x00000007b5980000) + ParOldGen total 1398272K, used 972374K [0x0000000740000000, 0x0000000795580000, 0x0000000795580000) + object space 1398272K, 69% used [0x0000000740000000,0x000000077b595888,0x0000000795580000) + Metaspace used 82960K, capacity 88649K, committed 100440K, reserved 1136640K + class space used 9256K, capacity 10498K, committed 13184K, reserved 1048576K +Event: 442.163 GC heap after +Heap after GC invocations=298 (full 9): + PSYoungGen total 528384K, used 115566K [0x0000000795580000, 0x00000007c0000000, 0x00000007c0000000) + eden space 363520K, 0% used [0x0000000795580000,0x0000000795580000,0x00000007ab880000) + from space 164864K, 70% used [0x00000007ab880000,0x00000007b295b970,0x00000007b5980000) + to space 164352K, 0% used [0x00000007b5f80000,0x00000007b5f80000,0x00000007c0000000) + ParOldGen total 1398272K, used 974102K [0x0000000740000000, 0x0000000795580000, 0x0000000795580000) + object space 1398272K, 69% used [0x0000000740000000,0x000000077b7458a8,0x0000000795580000) + Metaspace used 82960K, capacity 88649K, committed 100440K, reserved 1136640K + class space used 9256K, capacity 10498K, committed 13184K, reserved 1048576K +} +Event: 445.125 GC heap before +{Heap before GC invocations=299 (full 9): + PSYoungGen total 528384K, used 479086K [0x0000000795580000, 0x00000007c0000000, 0x00000007c0000000) + eden space 363520K, 100% used [0x0000000795580000,0x00000007ab880000,0x00000007ab880000) + from space 164864K, 70% used [0x00000007ab880000,0x00000007b295b970,0x00000007b5980000) + to space 164352K, 0% used [0x00000007b5f80000,0x00000007b5f80000,0x00000007c0000000) + ParOldGen total 1398272K, used 974102K [0x0000000740000000, 0x0000000795580000, 0x0000000795580000) + object space 1398272K, 69% used [0x0000000740000000,0x000000077b7458a8,0x0000000795580000) + Metaspace used 82960K, capacity 88649K, committed 100440K, reserved 1136640K + class space used 9256K, capacity 10498K, committed 13184K, reserved 1048576K +Event: 445.305 GC heap after +Heap after GC invocations=299 (full 9): + PSYoungGen total 517120K, used 142544K [0x0000000795580000, 0x00000007c0000000, 0x00000007c0000000) + eden space 352768K, 0% used [0x0000000795580000,0x0000000795580000,0x00000007aae00000) + from space 164352K, 86% used [0x00000007b5f80000,0x00000007beab40b8,0x00000007c0000000) + to space 173056K, 0% used [0x00000007aae00000,0x00000007aae00000,0x00000007b5700000) + ParOldGen total 1398272K, used 1020499K [0x0000000740000000, 0x0000000795580000, 0x0000000795580000) + object space 1398272K, 72% used [0x0000000740000000,0x000000077e494f68,0x0000000795580000) + Metaspace used 82960K, capacity 88649K, committed 100440K, reserved 1136640K + class space used 9256K, capacity 10498K, committed 13184K, reserved 1048576K +} +Event: 448.158 GC heap before +{Heap before GC invocations=300 (full 9): + PSYoungGen total 517120K, used 495229K [0x0000000795580000, 0x00000007c0000000, 0x00000007c0000000) + eden space 352768K, 99% used [0x0000000795580000,0x00000007aadeb5f0,0x00000007aae00000) + from space 164352K, 86% used [0x00000007b5f80000,0x00000007beab40b8,0x00000007c0000000) + to space 173056K, 0% used [0x00000007aae00000,0x00000007aae00000,0x00000007b5700000) + ParOldGen total 1398272K, used 1020499K [0x0000000740000000, 0x0000000795580000, 0x0000000795580000) + object space 1398272K, 72% used [0x0000000740000000,0x000000077e494f68,0x0000000795580000) + Metaspace used 82961K, capacity 88649K, committed 100440K, reserved 1136640K + class space used 9256K, capacity 10498K, committed 13184K, reserved 1048576K +Event: 448.191 GC heap after +Heap after GC invocations=300 (full 9): + PSYoungGen total 525824K, used 105465K [0x0000000795580000, 0x00000007c0000000, 0x00000007c0000000) + eden space 352768K, 0% used [0x0000000795580000,0x0000000795580000,0x00000007aae00000) + from space 173056K, 60% used [0x00000007aae00000,0x00000007b14fe4e8,0x00000007b5700000) + to space 166400K, 0% used [0x00000007b5d80000,0x00000007b5d80000,0x00000007c0000000) + ParOldGen total 1398272K, used 1020699K [0x0000000740000000, 0x0000000795580000, 0x0000000795580000) + object space 1398272K, 72% used [0x0000000740000000,0x000000077e4c6f88,0x0000000795580000) + Metaspace used 82961K, capacity 88649K, committed 100440K, reserved 1136640K + class space used 9256K, capacity 10498K, committed 13184K, reserved 1048576K +} +Event: 448.857 GC heap before +{Heap before GC invocations=301 (full 9): + PSYoungGen total 525824K, used 458233K [0x0000000795580000, 0x00000007c0000000, 0x00000007c0000000) + eden space 352768K, 100% used [0x0000000795580000,0x00000007aae00000,0x00000007aae00000) + from space 173056K, 60% used [0x00000007aae00000,0x00000007b14fe4e8,0x00000007b5700000) + to space 166400K, 0% used [0x00000007b5d80000,0x00000007b5d80000,0x00000007c0000000) + ParOldGen total 1398272K, used 1020699K [0x0000000740000000, 0x0000000795580000, 0x0000000795580000) + object space 1398272K, 72% used [0x0000000740000000,0x000000077e4c6f88,0x0000000795580000) + Metaspace used 82961K, capacity 88649K, committed 100440K, reserved 1136640K + class space used 9256K, capacity 10498K, committed 13184K, reserved 1048576K +Event: 448.863 GC heap after +Heap after GC invocations=301 (full 9): + PSYoungGen total 509952K, used 3552K [0x0000000795580000, 0x00000007c0000000, 0x00000007c0000000) + eden space 343552K, 0% used [0x0000000795580000,0x0000000795580000,0x00000007aa500000) + from space 166400K, 2% used [0x00000007b5d80000,0x00000007b60f8000,0x00000007c0000000) + to space 177664K, 0% used [0x00000007aa500000,0x00000007aa500000,0x00000007b5280000) + ParOldGen total 1398272K, used 1020755K [0x0000000740000000, 0x0000000795580000, 0x0000000795580000) + object space 1398272K, 73% used [0x0000000740000000,0x000000077e4d4f88,0x0000000795580000) + Metaspace used 82961K, capacity 88649K, committed 100440K, reserved 1136640K + class space used 9256K, capacity 10498K, committed 13184K, reserved 1048576K +} + +Deoptimization events (10 events): +Event: 429.646 Thread 0x00007fd978bc8800 Uncommon trap: reason=bimorphic action=maybe_recompile pc=0x0000000116642ebc method=com.facebook.presto.operator.WindowOperator.lambda$findGroupEnd$2(Lcom/facebook/presto/operator/PagesHashStrategy;Lcom/facebook/presto/spi/Page;Ljava/lang/Integer;Ljava/la +Event: 429.939 Thread 0x00007fd978bc8800 Uncommon trap: reason=bimorphic action=maybe_recompile pc=0x000000011821e704 method=com.facebook.presto.operator.PagesIndexOrdering.quickSort(Lcom/facebook/presto/operator/PagesIndex;II)V @ 216 +Event: 430.078 Thread 0x00007fd978bc8800 Uncommon trap: reason=unstable_if action=reinterpret pc=0x000000011829a3c0 method=com.facebook.presto.$gen.PagesIndexComparator_20191018_191723_1420.compareTo(Lcom/facebook/presto/operator/PagesIndex;II)I @ 92 +Event: 430.078 Thread 0x00007fd978bc8800 Uncommon trap: reason=bimorphic action=maybe_recompile pc=0x00000001182977e8 method=com.facebook.presto.operator.PagesIndexOrdering.quickSort(Lcom/facebook/presto/operator/PagesIndex;II)V @ 290 +Event: 430.366 Thread 0x00007fd978bc8800 Uncommon trap: reason=bimorphic action=maybe_recompile pc=0x000000011821e704 method=com.facebook.presto.operator.PagesIndexOrdering.quickSort(Lcom/facebook/presto/operator/PagesIndex;II)V @ 216 +Event: 430.427 Thread 0x00007fd975c5a800 Uncommon trap: reason=bimorphic action=maybe_recompile pc=0x000000011821e704 method=com.facebook.presto.operator.PagesIndexOrdering.quickSort(Lcom/facebook/presto/operator/PagesIndex;II)V @ 216 +Event: 430.434 Thread 0x00007fd978bc8800 Uncommon trap: reason=bimorphic action=maybe_recompile pc=0x00000001182977e8 method=com.facebook.presto.operator.PagesIndexOrdering.quickSort(Lcom/facebook/presto/operator/PagesIndex;II)V @ 290 +Event: 430.536 Thread 0x00007fd975c5a800 Uncommon trap: reason=bimorphic action=maybe_recompile pc=0x00000001182977e8 method=com.facebook.presto.operator.PagesIndexOrdering.quickSort(Lcom/facebook/presto/operator/PagesIndex;II)V @ 290 +Event: 430.551 Thread 0x00007fd978bc8800 Uncommon trap: reason=bimorphic action=maybe_recompile pc=0x0000000118280e88 method=com.facebook.presto.operator.PagesIndexOrdering.quickSort(Lcom/facebook/presto/operator/PagesIndex;II)V @ 42 +Event: 430.551 Thread 0x00007fd978bc8800 Uncommon trap: reason=bimorphic action=maybe_recompile pc=0x0000000118280e88 method=com.facebook.presto.operator.PagesIndexOrdering.quickSort(Lcom/facebook/presto/operator/PagesIndex;II)V @ 42 + +Classes redefined (0 events): +No events + +Internal exceptions (10 events): +Event: 140.567 Thread 0x00007fd978bc8800 Implicit null exception at 0x00000001157e5dbd to 0x00000001157e5f41 +Event: 151.067 Thread 0x00007fd978bc8800 Implicit null exception at 0x0000000114ff8f6a to 0x0000000114ff9259 +Event: 162.307 Thread 0x00007fd975dd6800 Exception (0x000000079ba95410) thrown at [/Users/jenkins/workspace/build-scripts/jobs/jdk8u/jdk8u-mac-x64-hotspot/workspace/build/src/hotspot/src/share/vm/runtime/objectMonitor.cpp, line 1684] +Event: 162.325 Thread 0x00007fd977fcd000 Exception (0x000000079bfda158) thrown at [/Users/jenkins/workspace/build-scripts/jobs/jdk8u/jdk8u-mac-x64-hotspot/workspace/build/src/hotspot/src/share/vm/runtime/objectMonitor.cpp, line 1684] +Event: 179.666 Thread 0x00007fd9796dc000 Exception (0x0000000795a94990) thrown at [/Users/jenkins/workspace/build-scripts/jobs/jdk8u/jdk8u-mac-x64-hotspot/workspace/build/src/hotspot/src/share/vm/interpreter/interpreterRuntime.cpp, line 368] +Event: 179.667 Thread 0x00007fd97a5ad800 Exception (0x0000000795d10588) thrown at [/Users/jenkins/workspace/build-scripts/jobs/jdk8u/jdk8u-mac-x64-hotspot/workspace/build/src/hotspot/src/share/vm/interpreter/interpreterRuntime.cpp, line 368] +Event: 267.500 Thread 0x00007fd9796dc000 Exception (0x0000000795b6a9d0) thrown at [/Users/jenkins/workspace/build-scripts/jobs/jdk8u/jdk8u-mac-x64-hotspot/workspace/build/src/hotspot/src/share/vm/interpreter/interpreterRuntime.cpp, line 368] +Event: 267.500 Thread 0x00007fd97a5ad800 Exception (0x0000000795b6cab8) thrown at [/Users/jenkins/workspace/build-scripts/jobs/jdk8u/jdk8u-mac-x64-hotspot/workspace/build/src/hotspot/src/share/vm/interpreter/interpreterRuntime.cpp, line 368] +Event: 370.837 Thread 0x00007fd978bc8800 Implicit null exception at 0x0000000117f776a7 to 0x0000000117f777a9 +Event: 374.179 Thread 0x00007fd975c5a800 Implicit null exception at 0x00000001152b7a90 to 0x00000001152b7b39 + +Events (10 events): +Event: 449.142 Executing VM operation: RevokeBias +Event: 449.145 Executing VM operation: RevokeBias done +Event: 449.168 loading class io/airlift/slice/Slice +Event: 449.168 loading class io/airlift/slice/Slice done +Event: 449.169 loading class io/airlift/slice/Slice +Event: 449.169 loading class io/airlift/slice/Slice done +Event: 449.169 loading class io/airlift/slice/Slice +Event: 449.169 loading class io/airlift/slice/Slice done +Event: 449.169 loading class com/facebook/presto/spi/predicate/NullableValue +Event: 449.169 loading class com/facebook/presto/spi/predicate/NullableValue done + + +Dynamic libraries: +0x00007fff34a89000 /System/Library/Frameworks/Cocoa.framework/Versions/A/Cocoa +0x00007fff4082d000 /System/Library/Frameworks/Security.framework/Versions/A/Security +0x00007fff3371c000 /System/Library/Frameworks/ApplicationServices.framework/Versions/A/ApplicationServices +0x00007fff60918000 /usr/lib/libz.1.dylib +0x00007fff5e272000 /usr/lib/libSystem.B.dylib +0x00007fff5faa9000 /usr/lib/libobjc.A.dylib +0x00007fff37580000 /System/Library/Frameworks/Foundation.framework/Versions/C/Foundation +0x00007fff352fe000 /System/Library/Frameworks/CoreFoundation.framework/Versions/A/CoreFoundation +0x00007fff32915000 /System/Library/Frameworks/AppKit.framework/Versions/C/AppKit +0x00007fff34e87000 /System/Library/Frameworks/CoreData.framework/Versions/A/CoreData +0x00007fff57d38000 /System/Library/PrivateFrameworks/RemoteViewServices.framework/Versions/A/RemoteViewServices +0x00007fff5bfaf000 /System/Library/PrivateFrameworks/UIFoundation.framework/Versions/A/UIFoundation +0x00007fff5d693000 /System/Library/PrivateFrameworks/XCTTargetBootstrap.framework/Versions/A/XCTTargetBootstrap +0x00007fff3520d000 /System/Library/Frameworks/CoreDisplay.framework/Versions/A/CoreDisplay +0x00007fff52a72000 /System/Library/PrivateFrameworks/IconServices.framework/Versions/A/IconServices +0x00007fff39ea1000 /System/Library/Frameworks/Metal.framework/Versions/A/Metal +0x00007fff49fb7000 /System/Library/PrivateFrameworks/DesktopServicesPriv.framework/Versions/A/DesktopServicesPriv +0x00007fff5ee0c000 /usr/lib/libenergytrace.dylib +0x00007fff5a496000 /System/Library/PrivateFrameworks/SkyLight.framework/Versions/A/SkyLight +0x00007fff35744000 /System/Library/Frameworks/CoreGraphics.framework/Versions/A/CoreGraphics +0x00007fff316e1000 /System/Library/Frameworks/Accelerate.framework/Versions/A/Accelerate +0x00007fff37cc9000 /System/Library/Frameworks/IOSurface.framework/Versions/A/IOSurface +0x00007fff49e23000 /System/Library/PrivateFrameworks/DFRFoundation.framework/Versions/A/DFRFoundation +0x00007fff33bff000 /System/Library/Frameworks/AudioToolbox.framework/Versions/A/AudioToolbox +0x00007fff33e78000 /System/Library/Frameworks/AudioUnit.framework/Versions/A/AudioUnit +0x00007fff49eb8000 /System/Library/PrivateFrameworks/DataDetectorsCore.framework/Versions/A/DataDetectorsCore +0x00007fff3458c000 /System/Library/Frameworks/Carbon.framework/Versions/A/Frameworks/HIToolbox.framework/Versions/A/HIToolbox +0x00007fff5ef65000 /usr/lib/libicucore.A.dylib +0x00007fff3fda0000 /System/Library/Frameworks/QuartzCore.framework/Versions/A/QuartzCore +0x00007fff34961000 /System/Library/Frameworks/Carbon.framework/Versions/A/Frameworks/SpeechRecognition.framework/Versions/A/SpeechRecognition +0x00007fff5e3c6000 /usr/lib/libauto.dylib +0x00007fff6080c000 /usr/lib/libxml2.2.dylib +0x00007fff499fe000 /System/Library/PrivateFrameworks/CoreUI.framework/Versions/A/CoreUI +0x00007fff34d72000 /System/Library/Frameworks/CoreAudio.framework/Versions/A/CoreAudio +0x00007fff373b5000 /System/Library/Frameworks/DiskArbitration.framework/Versions/A/DiskArbitration +0x00007fff5f213000 /usr/lib/liblangid.dylib +0x00007fff53e7d000 /System/Library/PrivateFrameworks/MultitouchSupport.framework/Versions/A/MultitouchSupport +0x00007fff37c38000 /System/Library/Frameworks/IOKit.framework/Versions/A/IOKit +0x00007fff5de32000 /usr/lib/libDiagnosticMessagesClient.dylib +0x00007fff36553000 /System/Library/Frameworks/CoreServices.framework/Versions/A/CoreServices +0x00007fff55b37000 /System/Library/PrivateFrameworks/PerformanceAnalysis.framework/Versions/A/PerformanceAnalysis +0x00007fff3ef8e000 /System/Library/Frameworks/OpenGL.framework/Versions/A/OpenGL +0x00007fff34a97000 /System/Library/Frameworks/ColorSync.framework/Versions/A/ColorSync +0x00007fff35dd5000 /System/Library/Frameworks/CoreImage.framework/Versions/A/CoreImage +0x00007fff36f2b000 /System/Library/Frameworks/CoreText.framework/Versions/A/CoreText +0x00007fff37d2c000 /System/Library/Frameworks/ImageIO.framework/Versions/A/ImageIO +0x00007fff46050000 /System/Library/PrivateFrameworks/Backup.framework/Versions/A/Backup +0x00007fff5e319000 /usr/lib/libarchive.2.dylib +0x00007fff341d1000 /System/Library/Frameworks/CFNetwork.framework/Versions/A/CFNetwork +0x00007fff40f89000 /System/Library/Frameworks/SystemConfiguration.framework/Versions/A/SystemConfiguration +0x00007fff5dd63000 /usr/lib/libCRFSuite.dylib +0x00007fff5e4b5000 /usr/lib/libc++.1.dylib +0x00007fff5e509000 /usr/lib/libc++abi.dylib +0x00007fff6110e000 /usr/lib/system/libcache.dylib +0x00007fff61113000 /usr/lib/system/libcommonCrypto.dylib +0x00007fff6111e000 /usr/lib/system/libcompiler_rt.dylib +0x00007fff61126000 /usr/lib/system/libcopyfile.dylib +0x00007fff61130000 /usr/lib/system/libcorecrypto.dylib +0x00007fff6123b000 /usr/lib/system/libdispatch.dylib +0x00007fff61275000 /usr/lib/system/libdyld.dylib +0x00007fff612a2000 /usr/lib/system/libkeymgr.dylib +0x00007fff612b0000 /usr/lib/system/liblaunch.dylib +0x00007fff612b1000 /usr/lib/system/libmacho.dylib +0x00007fff612b7000 /usr/lib/system/libquarantine.dylib +0x00007fff612ba000 /usr/lib/system/libremovefile.dylib +0x00007fff612bc000 /usr/lib/system/libsystem_asl.dylib +0x00007fff612d4000 /usr/lib/system/libsystem_blocks.dylib +0x00007fff612d5000 /usr/lib/system/libsystem_c.dylib +0x00007fff6135d000 /usr/lib/system/libsystem_configuration.dylib +0x00007fff61361000 /usr/lib/system/libsystem_coreservices.dylib +0x00007fff61365000 /usr/lib/system/libsystem_darwin.dylib +0x00007fff6136c000 /usr/lib/system/libsystem_dnssd.dylib +0x00007fff61373000 /usr/lib/system/libsystem_info.dylib +0x00007fff613e8000 /usr/lib/system/libsystem_m.dylib +0x00007fff61434000 /usr/lib/system/libsystem_malloc.dylib +0x00007fff6145f000 /usr/lib/system/libsystem_networkextension.dylib +0x00007fff6146a000 /usr/lib/system/libsystem_notify.dylib +0x00007fff61487000 /usr/lib/system/libsystem_sandbox.dylib +0x00007fff6148b000 /usr/lib/system/libsystem_secinit.dylib +0x00007fff613bf000 /usr/lib/system/libsystem_kernel.dylib +0x00007fff61472000 /usr/lib/system/libsystem_platform.dylib +0x00007fff6147c000 /usr/lib/system/libsystem_pthread.dylib +0x00007fff6148e000 /usr/lib/system/libsystem_symptoms.dylib +0x00007fff61496000 /usr/lib/system/libsystem_trace.dylib +0x00007fff614ad000 /usr/lib/system/libunwind.dylib +0x00007fff614b3000 /usr/lib/system/libxpc.dylib +0x00007fff5e496000 /usr/lib/libbsm.0.dylib +0x00007fff612a3000 /usr/lib/system/libkxld.dylib +0x00007fff45441000 /System/Library/PrivateFrameworks/AppleFSCompression.framework/Versions/A/AppleFSCompression +0x00007fff5e132000 /usr/lib/libOpenScriptingUtil.dylib +0x00007fff5e7be000 /usr/lib/libcoretls.dylib +0x00007fff5e7d5000 /usr/lib/libcoretls_cfhelpers.dylib +0x00007fff60240000 /usr/lib/libpam.2.dylib +0x00007fff60400000 /usr/lib/libsqlite3.dylib +0x00007fff607fa000 /usr/lib/libxar.1.dylib +0x00007fff5e4a7000 /usr/lib/libbz2.1.0.dylib +0x00007fff5f215000 /usr/lib/liblzma.5.dylib +0x00007fff5f6c6000 /usr/lib/libnetwork.dylib +0x00007fff5e302000 /usr/lib/libapple_nghttp2.dylib +0x00007fff60247000 /usr/lib/libpcap.A.dylib +0x00007fff368f2000 /System/Library/Frameworks/CoreServices.framework/Versions/A/Frameworks/FSEvents.framework/Versions/A/FSEvents +0x00007fff365d1000 /System/Library/Frameworks/CoreServices.framework/Versions/A/Frameworks/CarbonCore.framework/Versions/A/CarbonCore +0x00007fff36aad000 /System/Library/Frameworks/CoreServices.framework/Versions/A/Frameworks/Metadata.framework/Versions/A/Metadata +0x00007fff36b4c000 /System/Library/Frameworks/CoreServices.framework/Versions/A/Frameworks/OSServices.framework/Versions/A/OSServices +0x00007fff36b97000 /System/Library/Frameworks/CoreServices.framework/Versions/A/Frameworks/SearchKit.framework/Versions/A/SearchKit +0x00007fff36554000 /System/Library/Frameworks/CoreServices.framework/Versions/A/Frameworks/AE.framework/Versions/A/AE +0x00007fff368fb000 /System/Library/Frameworks/CoreServices.framework/Versions/A/Frameworks/LaunchServices.framework/Versions/A/LaunchServices +0x00007fff368a9000 /System/Library/Frameworks/CoreServices.framework/Versions/A/Frameworks/DictionaryServices.framework/Versions/A/DictionaryServices +0x00007fff36bff000 /System/Library/Frameworks/CoreServices.framework/Versions/A/Frameworks/SharedFileList.framework/Versions/A/SharedFileList +0x00007fff3af69000 /System/Library/Frameworks/NetFS.framework/Versions/A/NetFS +0x00007fff540e3000 /System/Library/PrivateFrameworks/NetAuth.framework/Versions/A/NetAuth +0x00007fff5da97000 /System/Library/PrivateFrameworks/login.framework/Versions/A/Frameworks/loginsupport.framework/Versions/A/loginsupport +0x00007fff5bbb8000 /System/Library/PrivateFrameworks/TCC.framework/Versions/A/TCC +0x00007fff4905b000 /System/Library/PrivateFrameworks/CoreNLP.framework/Versions/A/CoreNLP +0x00007fff53b78000 /System/Library/PrivateFrameworks/MetadataUtilities.framework/Versions/A/MetadataUtilities +0x00007fff5f2ea000 /usr/lib/libmecabra.dylib +0x00007fff3371d000 /System/Library/Frameworks/ApplicationServices.framework/Versions/A/Frameworks/ATS.framework/Versions/A/ATS +0x00007fff33a6c000 /System/Library/Frameworks/ApplicationServices.framework/Versions/A/Frameworks/ColorSyncLegacy.framework/Versions/A/ColorSyncLegacy +0x00007fff33b0b000 /System/Library/Frameworks/ApplicationServices.framework/Versions/A/Frameworks/HIServices.framework/Versions/A/HIServices +0x00007fff33b5e000 /System/Library/Frameworks/ApplicationServices.framework/Versions/A/Frameworks/LangAnalysis.framework/Versions/A/LangAnalysis +0x00007fff33b6e000 /System/Library/Frameworks/ApplicationServices.framework/Versions/A/Frameworks/PrintCore.framework/Versions/A/PrintCore +0x00007fff33bb8000 /System/Library/Frameworks/ApplicationServices.framework/Versions/A/Frameworks/QD.framework/Versions/A/QD +0x00007fff33bf2000 /System/Library/Frameworks/ApplicationServices.framework/Versions/A/Frameworks/SpeechSynthesis.framework/Versions/A/SpeechSynthesis +0x00007fff5e531000 /usr/lib/libcompression.dylib +0x00007fff3a171000 /System/Library/Frameworks/MetalPerformanceShaders.framework/Versions/A/MetalPerformanceShaders +0x00007fff316f9000 /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vImage.framework/Versions/A/vImage +0x00007fff32773000 /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/vecLib +0x00007fff326bf000 /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libvMisc.dylib +0x00007fff324d7000 /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libvDSP.dylib +0x00007fff31d93000 /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib +0x00007fff32080000 /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libLAPACK.dylib +0x00007fff3242a000 /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libLinearAlgebra.dylib +0x00007fff324c3000 /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libSparseBLAS.dylib +0x00007fff32440000 /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libQuadrature.dylib +0x00007fff3200d000 /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBNNS.dylib +0x00007fff32446000 /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libSparse.dylib +0x00007fff503a9000 /System/Library/PrivateFrameworks/GPUWrangler.framework/Versions/A/GPUWrangler +0x00007fff526a7000 /System/Library/PrivateFrameworks/IOAccelerator.framework/Versions/A/IOAccelerator +0x00007fff526b2000 /System/Library/PrivateFrameworks/IOPresentment.framework/Versions/A/IOPresentment +0x00007fff49e33000 /System/Library/PrivateFrameworks/DSExternalDisplay.framework/Versions/A/DSExternalDisplay +0x00007fff3e3e5000 /System/Library/Frameworks/OpenGL.framework/Versions/A/Libraries/libCoreFSCache.dylib +0x00007fff39f65000 /System/Library/Frameworks/MetalPerformanceShaders.framework/Frameworks/MPSCore.framework/Versions/A/MPSCore +0x00007fff39f85000 /System/Library/Frameworks/MetalPerformanceShaders.framework/Frameworks/MPSImage.framework/Versions/A/MPSImage +0x00007fff3a02a000 /System/Library/Frameworks/MetalPerformanceShaders.framework/Frameworks/MPSNeuralNetwork.framework/Versions/A/MPSNeuralNetwork +0x00007fff3a002000 /System/Library/Frameworks/MetalPerformanceShaders.framework/Frameworks/MPSMatrix.framework/Versions/A/MPSMatrix +0x00007fff3a156000 /System/Library/Frameworks/MetalPerformanceShaders.framework/Frameworks/MPSRayIntersector.framework/Versions/A/MPSRayIntersector +0x00007fff53ba1000 /System/Library/PrivateFrameworks/MetalTools.framework/Versions/A/MetalTools +0x00007fff44ceb000 /System/Library/PrivateFrameworks/AggregateDictionary.framework/Versions/A/AggregateDictionary +0x00007fff5e112000 /usr/lib/libMobileGestalt.dylib +0x00007fff3708e000 /System/Library/Frameworks/CoreVideo.framework/Versions/A/CoreVideo +0x00007fff511ba000 /System/Library/PrivateFrameworks/GraphVisualizer.framework/Versions/A/GraphVisualizer +0x00007fff4b039000 /System/Library/PrivateFrameworks/FaceCore.framework/Versions/A/FaceCore +0x00007fff3da13000 /System/Library/Frameworks/OpenCL.framework/Versions/A/OpenCL +0x00007fff5de6a000 /usr/lib/libFosl_dynamic.dylib +0x00007fff5494e000 /System/Library/PrivateFrameworks/OTSVG.framework/Versions/A/OTSVG +0x00007fff33821000 /System/Library/Frameworks/ApplicationServices.framework/Versions/A/Frameworks/ATS.framework/Versions/A/Resources/libFontParser.dylib +0x00007fff33939000 /System/Library/Frameworks/ApplicationServices.framework/Versions/A/Frameworks/ATS.framework/Versions/A/Resources/libFontRegistry.dylib +0x00007fff37f9a000 /System/Library/Frameworks/ImageIO.framework/Versions/A/Resources/libJPEG.dylib +0x00007fff382ac000 /System/Library/Frameworks/ImageIO.framework/Versions/A/Resources/libTIFF.dylib +0x00007fff38282000 /System/Library/Frameworks/ImageIO.framework/Versions/A/Resources/libPng.dylib +0x00007fff37eb8000 /System/Library/Frameworks/ImageIO.framework/Versions/A/Resources/libGIF.dylib +0x00007fff37ebd000 /System/Library/Frameworks/ImageIO.framework/Versions/A/Resources/libJP2.dylib +0x00007fff382a9000 /System/Library/Frameworks/ImageIO.framework/Versions/A/Resources/libRadiance.dylib +0x00007fff45598000 /System/Library/PrivateFrameworks/AppleJPEG.framework/Versions/A/AppleJPEG +0x00007fff3e3f0000 /System/Library/Frameworks/OpenGL.framework/Versions/A/Libraries/libGFXShared.dylib +0x00007fff3e5b3000 /System/Library/Frameworks/OpenGL.framework/Versions/A/Libraries/libGLU.dylib +0x00007fff3e3f9000 /System/Library/Frameworks/OpenGL.framework/Versions/A/Libraries/libGL.dylib +0x00007fff3e405000 /System/Library/Frameworks/OpenGL.framework/Versions/A/Libraries/libGLImage.dylib +0x00007fff3e3e2000 /System/Library/Frameworks/OpenGL.framework/Versions/A/Libraries/libCVMSPluginSupport.dylib +0x00007fff3e3eb000 /System/Library/Frameworks/OpenGL.framework/Versions/A/Libraries/libCoreVMClient.dylib +0x00007fff5ec82000 /usr/lib/libcups.2.dylib +0x00007fff3946c000 /System/Library/Frameworks/Kerberos.framework/Versions/A/Kerberos +0x00007fff3799c000 /System/Library/Frameworks/GSS.framework/Versions/A/GSS +0x00007fff60395000 /usr/lib/libresolv.9.dylib +0x00007fff5ee74000 /usr/lib/libiconv.2.dylib +0x00007fff5132f000 /System/Library/PrivateFrameworks/Heimdal.framework/Versions/A/Heimdal +0x00007fff5ee44000 /usr/lib/libheimdal-asn1.dylib +0x00007fff3da87000 /System/Library/Frameworks/OpenDirectory.framework/Versions/A/OpenDirectory +0x00007fff47dbe000 /System/Library/PrivateFrameworks/CommonAuth.framework/Versions/A/CommonAuth +0x00007fff3da6b000 /System/Library/Frameworks/OpenDirectory.framework/Versions/A/Frameworks/CFOpenDirectory.framework/Versions/A/CFOpenDirectory +0x00007fff40b2e000 /System/Library/Frameworks/SecurityFoundation.framework/Versions/A/SecurityFoundation +0x00007fff44232000 /System/Library/PrivateFrameworks/APFS.framework/Versions/A/APFS +0x00007fff607f6000 /usr/lib/libutil.dylib +0x00007fff5e51f000 /usr/lib/libcharset.1.dylib +0x00007fff45835000 /System/Library/PrivateFrameworks/AppleSauce.framework/Versions/A/AppleSauce +0x00007fff459b6000 /System/Library/PrivateFrameworks/AssertionServices.framework/Versions/A/AssertionServices +0x00007fff460e6000 /System/Library/PrivateFrameworks/BaseBoard.framework/Versions/A/BaseBoard +0x00007fff5f245000 /usr/lib/libmecab.1.0.0.dylib +0x00007fff5ee3e000 /usr/lib/libgermantok.dylib +0x00007fff5e2ef000 /usr/lib/libThaiTokenizer.dylib +0x00007fff5dd9a000 /usr/lib/libChineseTokenizer.dylib +0x00007fff52d5f000 /System/Library/PrivateFrameworks/LanguageModeling.framework/Versions/A/LanguageModeling +0x00007fff48aa0000 /System/Library/PrivateFrameworks/CoreEmoji.framework/Versions/A/CoreEmoji +0x00007fff52e3b000 /System/Library/PrivateFrameworks/Lexicon.framework/Versions/A/Lexicon +0x00007fff52e7e000 /System/Library/PrivateFrameworks/LinguisticData.framework/Versions/A/LinguisticData +0x00007fff5e520000 /usr/lib/libcmph.dylib +0x00007fff40bec000 /System/Library/Frameworks/ServiceManagement.framework/Versions/A/ServiceManagement +0x00007fff46047000 /System/Library/PrivateFrameworks/BackgroundTaskManagement.framework/Versions/A/BackgroundTaskManagement +0x00007fff608ef000 /usr/lib/libxslt.1.dylib +0x00007fff3488e000 /System/Library/Frameworks/Carbon.framework/Versions/A/Frameworks/Ink.framework/Versions/A/Ink +0x00007fff5be2a000 /System/Library/PrivateFrameworks/TextureIO.framework/Versions/A/TextureIO +0x00007fff5e343000 /usr/lib/libate.dylib +0x00007fff49d82000 /System/Library/PrivateFrameworks/CrashReporterSupport.framework/Versions/A/CrashReporterSupport +0x00007fff59564000 /System/Library/PrivateFrameworks/Sharing.framework/Versions/A/Sharing +0x00007fff45d99000 /System/Library/PrivateFrameworks/AuthKit.framework/Versions/A/AuthKit +0x00007fff452ed000 /System/Library/PrivateFrameworks/Apple80211.framework/Versions/A/Apple80211 +0x00007fff49b2a000 /System/Library/PrivateFrameworks/CoreUtils.framework/Versions/A/CoreUtils +0x00007fff370cf000 /System/Library/Frameworks/CoreWLAN.framework/Versions/A/CoreWLAN +0x00007fff37acb000 /System/Library/Frameworks/IOBluetooth.framework/Versions/A/IOBluetooth +0x00007fff579d8000 /System/Library/PrivateFrameworks/ProtocolBuffer.framework/Versions/A/ProtocolBuffer +0x00007fff53dd9000 /System/Library/PrivateFrameworks/MobileKeyBag.framework/Versions/A/MobileKeyBag +0x00007fff4936e000 /System/Library/PrivateFrameworks/CorePhoneNumbers.framework/Versions/A/CorePhoneNumbers +0x00007fff4554c000 /System/Library/PrivateFrameworks/AppleIDAuthSupport.framework/Versions/A/AppleIDAuthSupport +0x00007fff52d32000 /System/Library/PrivateFrameworks/KeychainCircle.framework/Versions/A/KeychainCircle +0x00007fff57b8e000 /System/Library/PrivateFrameworks/ROCKit.framework/Versions/A/ROCKit +0x00007fff49d1e000 /System/Library/PrivateFrameworks/CoreWiFi.framework/Versions/A/CoreWiFi +0x00007fff34e5c000 /System/Library/Frameworks/CoreBluetooth.framework/Versions/A/CoreBluetooth +0x00007fff49f79000 /System/Library/PrivateFrameworks/DebugSymbols.framework/Versions/A/DebugSymbols +0x00007fff498ea000 /System/Library/PrivateFrameworks/CoreSymbolication.framework/Versions/A/CoreSymbolication +0x00007fff5b645000 /System/Library/PrivateFrameworks/Symbolication.framework/Versions/A/Symbolication +0x00007fff5aee8000 /System/Library/PrivateFrameworks/SpeechRecognitionCore.framework/Versions/A/SpeechRecognitionCore +0x000000010ca00000 /Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home/jre/lib/server/libjvm.dylib +0x00007fff606d1000 /usr/lib/libstdc++.6.dylib +0x000000010b9df000 /Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home/jre/lib/libverify.dylib +0x000000010d51a000 /Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home/jre/lib/libjava.dylib +0x000000010d592000 /Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home/jre/lib/libzip.dylib +0x0000000123cb4000 /Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home/jre/lib/libmanagement.dylib +0x0000000123c95000 /Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home/jre/lib/libnet.dylib +0x0000000114b86000 /Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home/jre/lib/libnio.dylib +0x00000001242d9000 /Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home/jre/lib/libsunec.dylib + +VM Arguments: +jvm_args: -Dfile.encoding=UTF-8 -Xmx2g -Xms2g -XX:+ExitOnOutOfMemoryError -XX:+HeapDumpOnOutOfMemoryError -XX:-OmitStackTraceInFastThrow +java_command: /Users/atavory/presto/presto-main/target/surefire/surefirebooter8296995797813530564.jar /Users/atavory/presto/presto-main/target/surefire 2019-10-18T22-10-13_380-jvmRun1 surefire6366065411750570270tmp surefire_07089425243565215865tmp +java_class_path (initial): /Users/atavory/presto/presto-main/target/surefire/surefirebooter8296995797813530564.jar +Launcher Type: SUN_STANDARD + +Environment Variables: +PATH=/opt/local/bin:/opt/local/sbin:/opt/facebook/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:/Library/TeX/texbin:/opt/facebook/bin/biggrep:/opt/facebook/nuclide/latest/nuclide/pkg/fb-on-demand-cli/bin:/opt/facebook/ops/scripts/chef:/opt/homebrew/bin:/usr/local/munki:/opt/facebook/hg/bin +SHELL=/bin/bash + +Signal Handlers: +SIGSEGV: [libjvm.dylib+0x58618b], sa_mask[0]=11111111011111110111111111111111, sa_flags=SA_ONSTACK|SA_RESTART|SA_SIGINFO +SIGBUS: [libjvm.dylib+0x58618b], sa_mask[0]=11111111011111110111111111111111, sa_flags=SA_RESTART|SA_SIGINFO +SIGFPE: [libjvm.dylib+0x4614ec], sa_mask[0]=11111111011111110111111111111111, sa_flags=SA_RESTART|SA_SIGINFO +SIGPIPE: [libjvm.dylib+0x4614ec], sa_mask[0]=11111111011111110111111111111111, sa_flags=SA_RESTART|SA_SIGINFO +SIGXFSZ: [libjvm.dylib+0x4614ec], sa_mask[0]=11111111011111110111111111111111, sa_flags=SA_RESTART|SA_SIGINFO +SIGILL: [libjvm.dylib+0x4614ec], sa_mask[0]=11111111011111110111111111111111, sa_flags=SA_RESTART|SA_SIGINFO +SIGUSR1: SIG_DFL, sa_mask[0]=00000000000000000000000000000000, sa_flags=none +SIGUSR2: [libjvm.dylib+0x46100a], sa_mask[0]=00100000000000000000000000000000, sa_flags=SA_RESTART|SA_SIGINFO +SIGHUP: [libjvm.dylib+0x45f595], sa_mask[0]=11111111011111110111111111111111, sa_flags=SA_RESTART|SA_SIGINFO +SIGINT: [libjvm.dylib+0x45f595], sa_mask[0]=11111111011111110111111111111111, sa_flags=SA_RESTART|SA_SIGINFO +SIGTERM: [libjvm.dylib+0x45f595], sa_mask[0]=11111111011111110111111111111111, sa_flags=SA_RESTART|SA_SIGINFO +SIGQUIT: [libjvm.dylib+0x45f595], sa_mask[0]=11111111011111110111111111111111, sa_flags=SA_RESTART|SA_SIGINFO + + +--------------- S Y S T E M --------------- + +OS:Bsduname:Darwin 18.7.0 Darwin Kernel Version 18.7.0: Thu Jun 20 18:42:21 PDT 2019; root:xnu-4903.270.47~4/RELEASE_X86_64 x86_64 +rlimit: STACK 8192k, CORE 0k, NPROC 1418, NOFILE 10240, AS infinity +load average:86.38 96.73 70.52 + +CPU:total 8 (initial active 8) (4 cores per cpu, 2 threads per core) family 6 model 158 stepping 9, cmov, cx8, fxsr, mmx, sse, sse2, sse3, ssse3, sse4.1, sse4.2, popcnt, avx, avx2, aes, clmul, erms, rtm, 3dnowpref, lzcnt, ht, tsc, tscinvbit, bmi1, bmi2, adx + +Memory: 4k page, physical 16777216k(25292k free) + +/proc/meminfo: + + +vm_info: OpenJDK 64-Bit Server VM (25.222-b10) for bsd-amd64 JRE (1.8.0_222-b10), built on Jul 17 2019 01:08:37 by "jenkins" with gcc 4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2336.11.00) + +time: Fri Oct 18 22:17:43 2019 +timezone: IDT +elapsed time: 449 seconds (0d 0h 7m 29s) + diff --git a/presto-main/src/main/java/com/facebook/presto/metadata/BuiltInFunctionNamespaceManager.java b/presto-main/src/main/java/com/facebook/presto/metadata/BuiltInFunctionNamespaceManager.java index 235ee37a6e117..a052f0b443ab0 100644 --- a/presto-main/src/main/java/com/facebook/presto/metadata/BuiltInFunctionNamespaceManager.java +++ b/presto-main/src/main/java/com/facebook/presto/metadata/BuiltInFunctionNamespaceManager.java @@ -61,6 +61,8 @@ import com.facebook.presto.operator.aggregation.VarianceAggregation; import com.facebook.presto.operator.aggregation.arrayagg.ArrayAggregationFunction; import com.facebook.presto.operator.aggregation.differentialentropy.DifferentialEntropyAggregation; +import com.facebook.presto.operator.aggregation.differentialmutualinformationclassification.NormalizedDifferentialMutualInformationClassificationAggregation; +import com.facebook.presto.operator.aggregation.discreteentropy.DiscreteEntropyAggregation; import com.facebook.presto.operator.aggregation.histogram.Histogram; import com.facebook.presto.operator.aggregation.multimapagg.MultimapAggregationFunction; import com.facebook.presto.operator.scalar.ArrayCardinalityFunction; @@ -445,6 +447,8 @@ public BuiltInFunctionNamespaceManager( .aggregates(IntervalDayToSecondAverageAggregation.class) .aggregates(IntervalYearToMonthAverageAggregation.class) .aggregates(DifferentialEntropyAggregation.class) + .aggregates(NormalizedDifferentialMutualInformationClassificationAggregation.class) + .aggregates(DiscreteEntropyAggregation.class) .aggregates(EntropyAggregation.class) .aggregates(GeometricMeanAggregations.class) .aggregates(RealGeometricMeanAggregations.class) diff --git a/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialentropy/DifferentialEntropyAggregation.java b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialentropy/DifferentialEntropyAggregation.java index 0c9218c1c6032..3aaa7aa46b9ae 100644 --- a/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialentropy/DifferentialEntropyAggregation.java +++ b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialentropy/DifferentialEntropyAggregation.java @@ -13,7 +13,6 @@ */ package com.facebook.presto.operator.aggregation.differentialentropy; -import com.facebook.presto.spi.PrestoException; import com.facebook.presto.spi.block.BlockBuilder; import com.facebook.presto.spi.function.AggregationFunction; import com.facebook.presto.spi.function.AggregationState; @@ -23,24 +22,15 @@ import com.facebook.presto.spi.function.OutputFunction; import com.facebook.presto.spi.function.SqlType; import com.facebook.presto.spi.type.StandardTypes; -import com.google.common.annotations.VisibleForTesting; import io.airlift.slice.Slice; -import static com.facebook.presto.spi.StandardErrorCode.INVALID_FUNCTION_ARGUMENT; import static com.facebook.presto.spi.type.DoubleType.DOUBLE; -import static com.google.common.base.Verify.verify; -import static java.lang.String.format; import static java.util.Locale.ENGLISH; @AggregationFunction("differential_entropy") @Description("Computes differential entropy based on random-variable samples") public final class DifferentialEntropyAggregation { - @VisibleForTesting - public static final String FIXED_HISTOGRAM_MLE_METHOD_NAME = "fixed_histogram_mle"; - @VisibleForTesting - public static final String FIXED_HISTOGRAM_JACKNIFE_METHOD_NAME = "fixed_histogram_jacknife"; - private DifferentialEntropyAggregation() {} @InputFunction @@ -53,38 +43,15 @@ public static void input( @SqlType(StandardTypes.DOUBLE) double min, @SqlType(StandardTypes.DOUBLE) double max) { - String requestedMethod = method.toStringUtf8().toLowerCase(ENGLISH); - DifferentialEntropyStateStrategy strategy = state.getStrategy(); - if (strategy == null) { - switch (requestedMethod) { - case FIXED_HISTOGRAM_MLE_METHOD_NAME: - strategy = new FixedHistogramMleStateStrategy(size, min, max); - break; - case FIXED_HISTOGRAM_JACKNIFE_METHOD_NAME: - strategy = new FixedHistogramJacknifeStateStrategy(size, min, max); - break; - default: - throw new PrestoException( - INVALID_FUNCTION_ARGUMENT, - format("In differential_entropy UDF, invalid method: %s", requestedMethod)); - } - state.setStrategy(strategy); - } - else { - switch (requestedMethod.toLowerCase(ENGLISH)) { - case FIXED_HISTOGRAM_MLE_METHOD_NAME: - verify(strategy instanceof FixedHistogramMleStateStrategy, - format("Strategy class is not compatible with entropy method: %s %s", strategy.getClass().getSimpleName(), method)); - break; - case FIXED_HISTOGRAM_JACKNIFE_METHOD_NAME: - verify(strategy instanceof FixedHistogramJacknifeStateStrategy, - format("Strategy class is not compatible with entropy method: %s %s", strategy.getClass().getSimpleName(), method)); - break; - default: - verify(false, format("Unknown entropy method %s", method)); - } - } - strategy.validateParameters(size, sample, weight, min, max); + DifferentialEntropyStateStrategy strategy = DifferentialEntropyStateStrategy.getStrategy( + state.getStrategy(), + size, + sample, + weight, + method.toStringUtf8().toLowerCase(ENGLISH), + min, + max); + state.setStrategy(strategy); strategy.add(sample, weight); } @@ -95,16 +62,12 @@ public static void input( @SqlType(StandardTypes.DOUBLE) double sample, @SqlType(StandardTypes.DOUBLE) double weight) { - DifferentialEntropyStateStrategy strategy = state.getStrategy(); - if (state.getStrategy() == null) { - strategy = new WeightedReservoirSampleStateStrategy(size); - state.setStrategy(strategy); - } - else { - verify(strategy instanceof WeightedReservoirSampleStateStrategy, - format("Expected WeightedReservoirSampleStateStrategy, got: %s", strategy.getClass().getSimpleName())); - } - strategy.validateParameters(size, sample, weight); + DifferentialEntropyStateStrategy strategy = DifferentialEntropyStateStrategy.getStrategy( + state.getStrategy(), + size, + sample, + weight); + state.setStrategy(strategy); strategy.add(sample, weight); } @@ -114,17 +77,12 @@ public static void input( @SqlType(StandardTypes.BIGINT) long size, @SqlType(StandardTypes.DOUBLE) double sample) { - DifferentialEntropyStateStrategy strategy = state.getStrategy(); - if (state.getStrategy() == null) { - strategy = new UnweightedReservoirSampleStateStrategy(size); - state.setStrategy(strategy); - } - else { - verify(strategy instanceof UnweightedReservoirSampleStateStrategy, - format("Expected UnweightedReservoirSampleStateStrategy, got: %s", strategy.getClass().getSimpleName())); - } - strategy.validateParameters(size, sample); - strategy.add(sample, 1.0); + DifferentialEntropyStateStrategy strategy = DifferentialEntropyStateStrategy.getStrategy( + state.getStrategy(), + size, + sample); + state.setStrategy(strategy); + strategy.add(sample); } @CombineFunction @@ -141,11 +99,7 @@ public static void combine( if (otherStrategy == null) { return; } - - verify(strategy.getClass() == otherStrategy.getClass(), - format("In combine, %s != %s", strategy.getClass().getSimpleName(), otherStrategy.getClass().getSimpleName())); - - strategy.mergeWith(otherStrategy); + DifferentialEntropyStateStrategy.combine(strategy, otherStrategy); } @OutputFunction("double") diff --git a/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialentropy/DifferentialEntropyStateFactory.java b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialentropy/DifferentialEntropyStateFactory.java index b44b1e8f877cf..198abc9a1977e 100644 --- a/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialentropy/DifferentialEntropyStateFactory.java +++ b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialentropy/DifferentialEntropyStateFactory.java @@ -104,9 +104,6 @@ public DifferentialEntropyStateStrategy getStrategy() @Override public long getEstimatedSize() { - if (strategy == null) { - return 0; - } return strategy.getEstimatedSize(); } } diff --git a/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialentropy/DifferentialEntropyStateSerializer.java b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialentropy/DifferentialEntropyStateSerializer.java index 377df8c65da74..d07ec5a5b4c77 100644 --- a/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialentropy/DifferentialEntropyStateSerializer.java +++ b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialentropy/DifferentialEntropyStateSerializer.java @@ -17,14 +17,11 @@ import com.facebook.presto.spi.block.BlockBuilder; import com.facebook.presto.spi.function.AccumulatorStateSerializer; import com.facebook.presto.spi.type.Type; -import io.airlift.slice.SizeOf; import io.airlift.slice.SliceInput; import io.airlift.slice.SliceOutput; import io.airlift.slice.Slices; import static com.facebook.presto.spi.type.VarbinaryType.VARBINARY; -import static com.google.common.base.Verify.verify; -import static java.lang.String.format; public class DifferentialEntropyStateSerializer implements AccumulatorStateSerializer @@ -39,36 +36,9 @@ public Type getSerializedType() public void serialize(DifferentialEntropyState state, BlockBuilder output) { DifferentialEntropyStateStrategy strategy = state.getStrategy(); - - if (strategy == null) { - SliceOutput sliceOut = Slices.allocate(SizeOf.SIZE_OF_INT).getOutput(); - sliceOut.appendInt(0); - VARBINARY.writeSlice(output, sliceOut.getUnderlyingSlice()); - return; - } - - int requiredBytes = SizeOf.SIZE_OF_INT + // Method - strategy.getRequiredBytesForSerialization(); // stateStrategy; - + int requiredBytes = DifferentialEntropyStateStrategy.getRequiredBytesForSerialization(strategy); SliceOutput sliceOut = Slices.allocate(requiredBytes).getOutput(); - - if (strategy instanceof UnweightedReservoirSampleStateStrategy) { - sliceOut.appendInt(1); - } - else if (strategy instanceof WeightedReservoirSampleStateStrategy) { - sliceOut.appendInt(2); - } - else if (strategy instanceof FixedHistogramMleStateStrategy) { - sliceOut.appendInt(3); - } - else if (strategy instanceof FixedHistogramJacknifeStateStrategy) { - sliceOut.appendInt(4); - } - else { - verify(false, format("Strategy cannot be serialized: %s", strategy.getClass().getSimpleName())); - } - - strategy.serialize(sliceOut); + DifferentialEntropyStateStrategy.serialize(strategy, sliceOut); VARBINARY.writeSlice(output, sliceOut.getUnderlyingSlice()); } @@ -79,25 +49,9 @@ public void deserialize( DifferentialEntropyState state) { SliceInput input = VARBINARY.getSlice(block, index).getInput(); - int method = input.readInt(); - switch (method) { - case 0: - verify(state.getStrategy() == null, "strategy is not null for null method"); - return; - case 1: - state.setStrategy(UnweightedReservoirSampleStateStrategy.deserialize(input)); - return; - case 2: - state.setStrategy(WeightedReservoirSampleStateStrategy.deserialize(input)); - return; - case 3: - state.setStrategy(FixedHistogramMleStateStrategy.deserialize(input)); - return; - case 4: - state.setStrategy(FixedHistogramJacknifeStateStrategy.deserialize(input)); - return; - default: - verify(false, format("Unknown method code when deserializing: %s", method)); + DifferentialEntropyStateStrategy strategy = DifferentialEntropyStateStrategy.deserialize(input); + if (strategy != null) { + state.setStrategy(strategy); } } } diff --git a/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialentropy/DifferentialEntropyStateStrategy.java b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialentropy/DifferentialEntropyStateStrategy.java index 3288b7a731dc6..957952fd960cb 100644 --- a/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialentropy/DifferentialEntropyStateStrategy.java +++ b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialentropy/DifferentialEntropyStateStrategy.java @@ -13,8 +13,14 @@ */ package com.facebook.presto.operator.aggregation.differentialentropy; +import com.facebook.presto.spi.PrestoException; +import com.google.common.annotations.VisibleForTesting; +import io.airlift.slice.SizeOf; +import io.airlift.slice.SliceInput; import io.airlift.slice.SliceOutput; +import static com.facebook.presto.spi.StandardErrorCode.INVALID_FUNCTION_ARGUMENT; +import static com.google.common.base.Verify.verify; import static java.lang.String.format; /** @@ -25,13 +31,114 @@ public interface DifferentialEntropyStateStrategy extends Cloneable { - void add(double sample, double weight); + @VisibleForTesting + String FIXED_HISTOGRAM_MLE_METHOD_NAME = "fixed_histogram_mle"; + @VisibleForTesting + String FIXED_HISTOGRAM_JACKNIFE_METHOD_NAME = "fixed_histogram_jacknife"; + + static DifferentialEntropyStateStrategy getStrategy( + DifferentialEntropyStateStrategy strategy, + long size, + double sample, + double weight, + String method, + double min, + double max) + { + if (strategy == null) { + switch (method) { + case DifferentialEntropyStateStrategy.FIXED_HISTOGRAM_MLE_METHOD_NAME: + strategy = new FixedHistogramMleStateStrategy(size, min, max); + break; + case DifferentialEntropyStateStrategy.FIXED_HISTOGRAM_JACKNIFE_METHOD_NAME: + strategy = new FixedHistogramJacknifeStateStrategy(size, min, max); + break; + default: + throw new PrestoException( + INVALID_FUNCTION_ARGUMENT, + format("In differential_entropy UDF, invalid method: %s", method)); + } + } + else { + switch (method) { + case DifferentialEntropyStateStrategy.FIXED_HISTOGRAM_MLE_METHOD_NAME: + if (!(strategy instanceof FixedHistogramMleStateStrategy)) { + throw new PrestoException( + INVALID_FUNCTION_ARGUMENT, + format("In differential_entropy, strategy class is not compatible with entropy method: %s %s", strategy.getClass().getSimpleName(), method)); + } + break; + case DifferentialEntropyStateStrategy.FIXED_HISTOGRAM_JACKNIFE_METHOD_NAME: + if (!(strategy instanceof FixedHistogramJacknifeStateStrategy)) { + throw new PrestoException( + INVALID_FUNCTION_ARGUMENT, + format("In differential_entropy, strategy class is not compatible with entropy method: %s %s", strategy.getClass().getSimpleName(), method)); + } + break; + default: + throw new PrestoException( + INVALID_FUNCTION_ARGUMENT, + format("In differential_entropy, unknown entropy method: %s", method)); + } + } + strategy.validateParameters(size, sample, weight, min, max); + return strategy; + } + + static DifferentialEntropyStateStrategy getStrategy( + DifferentialEntropyStateStrategy strategy, + long size, + double sample, + double weight) + { + if (strategy == null) { + strategy = new WeightedReservoirSampleStateStrategy(size); + } + else { + verify(strategy instanceof WeightedReservoirSampleStateStrategy, + format("In differential entropy, expected WeightedReservoirSampleStateStrategy, got: %s", strategy.getClass().getSimpleName())); + } + strategy.validateParameters(size, sample, weight); + return strategy; + } + + static DifferentialEntropyStateStrategy getStrategy( + DifferentialEntropyStateStrategy strategy, + long size, + double sample) + { + if (strategy == null) { + strategy = new UnweightedReservoirSampleStateStrategy(size); + } + else { + verify(strategy instanceof UnweightedReservoirSampleStateStrategy, + format("In differential entropy, expected UnweightedReservoirSampleStateStrategy, got: %s", strategy.getClass().getSimpleName())); + } + return strategy; + } + + default void add(double sample) + { + verify(false, format("Unweighted unsupported for type: %s", getClass().getSimpleName())); + } + + default void add(double sample, double weight) + { + verify(false, format("Weighted unsupported for type: %s", getClass().getSimpleName())); + } double calculateEntropy(); long getEstimatedSize(); - int getRequiredBytesForSerialization(); + static int getRequiredBytesForSerialization(DifferentialEntropyStateStrategy strategy) + { + return SizeOf.SIZE_OF_INT + // magic hash + SizeOf.SIZE_OF_INT + // method + (strategy == null ? 0 : strategy.getRequiredBytesForSpecificSerialization()); + } + + int getRequiredBytesForSpecificSerialization(); void serialize(SliceOutput out); @@ -39,21 +146,85 @@ public interface DifferentialEntropyStateStrategy DifferentialEntropyStateStrategy clone(); - default void validateParameters(long bucketCount, double sample, double weight, double min, double max) + default void validateParameters(long size, double sample, double weight, double min, double max) { throw new UnsupportedOperationException( format("In differential_entropy UDF, unsupported arguments for type: %s", getClass().getSimpleName())); } - default void validateParameters(long bucketCount, double sample, double weight) + default void validateParameters(long size, double sample, double weight) { throw new UnsupportedOperationException( format("In differential_entropy UDF, unsupported arguments for type: %s", getClass().getSimpleName())); } - default void validateParameters(long bucketCount, double sample) + default void validateParameters(long size, double sample) { throw new UnsupportedOperationException( format("In differential_entropy UDF, unsupported arguments for type: %s", getClass().getSimpleName())); } + + static void serialize(DifferentialEntropyStateStrategy strategy, SliceOutput sliceOut) + { + sliceOut.appendInt(DifferentialEntropyStateStrategy.class.getSimpleName().hashCode()); + if (strategy == null) { + sliceOut.appendInt(0); + return; + } + + if (strategy instanceof UnweightedReservoirSampleStateStrategy) { + sliceOut.appendInt(1); + } + else if (strategy instanceof WeightedReservoirSampleStateStrategy) { + sliceOut.appendInt(2); + } + else if (strategy instanceof FixedHistogramMleStateStrategy) { + sliceOut.appendInt(3); + } + else if (strategy instanceof FixedHistogramJacknifeStateStrategy) { + sliceOut.appendInt(4); + } + else { + verify(false, format("Strategy cannot be serialized: %s", strategy.getClass().getSimpleName())); + } + + strategy.serialize(sliceOut); + } + + static DifferentialEntropyStateStrategy deserialize(SliceInput input) + { + verify( + input.readInt() == DifferentialEntropyStateStrategy.class.getSimpleName().hashCode(), + "magic failed"); + int method = input.readInt(); + switch (method) { + case 0: + return null; + case 1: + return UnweightedReservoirSampleStateStrategy.deserialize(input); + case 2: + return WeightedReservoirSampleStateStrategy.deserialize(input); + case 3: + return FixedHistogramMleStateStrategy.deserialize(input); + case 4: + return FixedHistogramJacknifeStateStrategy.deserialize(input); + default: + verify(false, format("In differential_entropy UDF, Unknown method code when deserializing: %s", method)); + return null; + } + } + + static void combine( + DifferentialEntropyStateStrategy strategy, + DifferentialEntropyStateStrategy otherStrategy) + { + verify(strategy.getClass() == otherStrategy.getClass(), + format("In combine, %s != %s", strategy.getClass().getSimpleName(), otherStrategy.getClass().getSimpleName())); + + strategy.mergeWith(otherStrategy); + } + + DifferentialEntropyStateStrategy cloneEmpty(); + + double getTotalPopulationWeight(); } diff --git a/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialentropy/EntropyCalculations.java b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialentropy/EntropyCalculations.java index bac556c9b0793..479a6b99d7ee6 100644 --- a/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialentropy/EntropyCalculations.java +++ b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialentropy/EntropyCalculations.java @@ -15,6 +15,7 @@ import java.util.Arrays; +import static com.google.common.base.Verify.verify; import static java.lang.Math.toIntExact; public class EntropyCalculations @@ -24,7 +25,7 @@ private EntropyCalculations() {} /** * @implNote Based on Alizadeh Noughabi, Hadi & Arghami, N. (2010). "A New Estimator of Entropy". */ - public static double calculateFromSamples(double[] samples) + public static double calculateFromSamplesUsingVasicek(double[] samples) { if (samples.length == 0) { return Double.NaN; @@ -42,4 +43,10 @@ public static double calculateFromSamples(double[] samples) } return entropy / n / Math.log(2); } + + static double calculateEntropyFromHistogramAggregates(double width, double sumWeight, double sumWeightLogWeight) + { + verify(sumWeight > 0.0); + return Math.max((Math.log(width * sumWeight) - sumWeightLogWeight / sumWeight) / Math.log(2.0), 0.0); + } } diff --git a/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialentropy/FixedHistogramJacknifeStateStrategy.java b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialentropy/FixedHistogramJacknifeStateStrategy.java index c9eb31f0fa3fe..64aea7a3bfb5f 100644 --- a/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialentropy/FixedHistogramJacknifeStateStrategy.java +++ b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialentropy/FixedHistogramJacknifeStateStrategy.java @@ -20,7 +20,6 @@ import java.util.Map; import static com.facebook.presto.operator.aggregation.differentialentropy.FixedHistogramStateStrategyUtils.getXLogX; -import static com.google.common.base.Verify.verify; import static com.google.common.collect.Streams.stream; import static java.lang.Math.toIntExact; import static java.util.stream.Collectors.groupingBy; @@ -81,6 +80,14 @@ public void add(double value, double weight) histogram.add(value, weight); } + @Override + public double getTotalPopulationWeight() + { + return stream(histogram.iterator()) + .mapToDouble(FixedDoubleBreakdownHistogram.Bucket::getWeight) + .sum(); + } + @Override public double calculateEntropy() { @@ -98,7 +105,7 @@ public double calculateEntropy() double sumWeightLogWeight = bucketWeights.values().stream().mapToDouble(w -> w == 0.0 ? 0.0 : w * Math.log(w)).sum(); - double entropy = n * calculateEntropy(histogram.getWidth(), sumWeight, sumWeightLogWeight); + double entropy = n * EntropyCalculations.calculateEntropyFromHistogramAggregates(histogram.getWidth(), sumWeight, sumWeightLogWeight); for (FixedDoubleBreakdownHistogram.Bucket bucketWeight : histogram) { double weight = bucketWeights.get(bucketWeight.getLeft()); if (weight > 0.0) { @@ -130,17 +137,11 @@ private static double getHoldOutEntropy( double holdoutSumWeightLogWeight = sumWeightLogWeight - getXLogX(bucketWeight) + getXLogX(holdoutBucketWeight); double holdoutEntropy = entryMultiplicity * (n - 1) * - calculateEntropy(width, holdoutSumWeight, holdoutSumWeightLogWeight) / + EntropyCalculations.calculateEntropyFromHistogramAggregates(width, holdoutSumWeight, holdoutSumWeightLogWeight) / n; return holdoutEntropy; } - private static double calculateEntropy(double width, double sumWeight, double sumWeightLogWeight) - { - verify(sumWeight > 0.0); - return Math.max((Math.log(width * sumWeight) - sumWeightLogWeight / sumWeight) / Math.log(2.0), 0.0); - } - @Override public long getEstimatedSize() { @@ -148,7 +149,7 @@ public long getEstimatedSize() } @Override - public int getRequiredBytesForSerialization() + public int getRequiredBytesForSpecificSerialization() { return histogram.getRequiredBytesForSerialization(); } @@ -170,4 +171,10 @@ public DifferentialEntropyStateStrategy clone() { return new FixedHistogramJacknifeStateStrategy(this); } + + @Override + public DifferentialEntropyStateStrategy cloneEmpty() + { + return new FixedHistogramJacknifeStateStrategy(histogram.getBucketCount(), histogram.getMin(), histogram.getMax()); + } } diff --git a/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialentropy/FixedHistogramMleStateStrategy.java b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialentropy/FixedHistogramMleStateStrategy.java index 049221ca936f1..6d3cab4a6909d 100644 --- a/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialentropy/FixedHistogramMleStateStrategy.java +++ b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialentropy/FixedHistogramMleStateStrategy.java @@ -18,6 +18,7 @@ import io.airlift.slice.SliceOutput; import static com.facebook.presto.operator.aggregation.differentialentropy.FixedHistogramStateStrategyUtils.getXLogX; +import static com.google.common.collect.Streams.stream; import static java.lang.Math.toIntExact; import static java.util.Objects.requireNonNull; @@ -74,6 +75,14 @@ public void add(double sample, double weight) histogram.add(sample, weight); } + // Tmp Ami @Override + public double getTotalPopulationWeight() + { + return stream(histogram.iterator()) + .mapToDouble(FixedDoubleHistogram.Bucket::getWeight) + .sum(); + } + @Override public double calculateEntropy() { @@ -99,7 +108,7 @@ public long getEstimatedSize() } @Override - public int getRequiredBytesForSerialization() + public int getRequiredBytesForSpecificSerialization() { return histogram.getRequiredBytesForSerialization(); } @@ -128,4 +137,10 @@ public DifferentialEntropyStateStrategy clone() { return new FixedHistogramMleStateStrategy(this); } + + @Override + public DifferentialEntropyStateStrategy cloneEmpty() + { + return new FixedHistogramMleStateStrategy(histogram.getBucketCount(), histogram.getMin(), histogram.getMax()); + } } diff --git a/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialentropy/UnweightedReservoirSampleStateStrategy.java b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialentropy/UnweightedReservoirSampleStateStrategy.java index 8118adfdafe48..a15c0eb841076 100644 --- a/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialentropy/UnweightedReservoirSampleStateStrategy.java +++ b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialentropy/UnweightedReservoirSampleStateStrategy.java @@ -18,7 +18,7 @@ import io.airlift.slice.SliceInput; import io.airlift.slice.SliceOutput; -import static com.facebook.presto.operator.aggregation.differentialentropy.EntropyCalculations.calculateFromSamples; +import static com.facebook.presto.operator.aggregation.differentialentropy.EntropyCalculations.calculateFromSamplesUsingVasicek; import static com.facebook.presto.spi.StandardErrorCode.INVALID_FUNCTION_ARGUMENT; import static java.lang.Math.toIntExact; import static java.lang.String.format; @@ -87,20 +87,21 @@ public void mergeWith(DifferentialEntropyStateStrategy other) } @Override - public void add(double value, double weight) + public void add(double value) { - if (weight != 1.0) { - throw new PrestoException( - INVALID_FUNCTION_ARGUMENT, - format("In differential_entropy UDF, weight should be 1.0: %s", weight)); - } reservoir.add(value); } + @Override + public double getTotalPopulationWeight() + { + return (double) reservoir.getTotalPopulationCount(); + } + @Override public double calculateEntropy() { - return calculateFromSamples(reservoir.getSamples()); + return calculateFromSamplesUsingVasicek(reservoir.getSamples()); } @Override @@ -110,7 +111,7 @@ public long getEstimatedSize() } @Override - public int getRequiredBytesForSerialization() + public int getRequiredBytesForSpecificSerialization() { return reservoir.getRequiredBytesForSerialization(); } @@ -131,4 +132,10 @@ public DifferentialEntropyStateStrategy clone() { return new UnweightedReservoirSampleStateStrategy(this); } + + @Override + public DifferentialEntropyStateStrategy cloneEmpty() + { + return new UnweightedReservoirSampleStateStrategy(reservoir.getMaxSamples()); + } } diff --git a/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialentropy/WeightedReservoirSampleStateStrategy.java b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialentropy/WeightedReservoirSampleStateStrategy.java index 0ce2d14536f29..e65b81ef845df 100644 --- a/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialentropy/WeightedReservoirSampleStateStrategy.java +++ b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialentropy/WeightedReservoirSampleStateStrategy.java @@ -18,7 +18,7 @@ import io.airlift.slice.SliceInput; import io.airlift.slice.SliceOutput; -import static com.facebook.presto.operator.aggregation.differentialentropy.EntropyCalculations.calculateFromSamples; +import static com.facebook.presto.operator.aggregation.differentialentropy.EntropyCalculations.calculateFromSamplesUsingVasicek; import static com.facebook.presto.spi.StandardErrorCode.INVALID_FUNCTION_ARGUMENT; import static com.google.common.base.Verify.verify; import static java.lang.String.format; @@ -85,10 +85,16 @@ public void add(double value, double weight) reservoir.add(value, weight); } + @Override + public double getTotalPopulationWeight() + { + return reservoir.getTotalPopulationWeight(); + } + @Override public double calculateEntropy() { - return calculateFromSamples(reservoir.getSamples()); + return calculateFromSamplesUsingVasicek(reservoir.getSamples()); } @Override @@ -98,7 +104,7 @@ public long getEstimatedSize() } @Override - public int getRequiredBytesForSerialization() + public int getRequiredBytesForSpecificSerialization() { return reservoir.getRequiredBytesForSerialization(); } @@ -119,4 +125,10 @@ public DifferentialEntropyStateStrategy clone() { return new WeightedReservoirSampleStateStrategy(this); } + + @Override + public DifferentialEntropyStateStrategy cloneEmpty() + { + return new WeightedReservoirSampleStateStrategy(reservoir.getMaxSamples()); + } } diff --git a/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/DifferentialMutualInformationClassificationState.java b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/DifferentialMutualInformationClassificationState.java new file mode 100644 index 0000000000000..12da2b268b4c5 --- /dev/null +++ b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/DifferentialMutualInformationClassificationState.java @@ -0,0 +1,45 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.operator.aggregation.differentialmutualinformationclassification; + +import com.facebook.presto.operator.aggregation.differentialentropy.DifferentialEntropyStateStrategy; +import com.facebook.presto.operator.aggregation.discreteentropy.DiscreteEntropyStateStrategy; +import com.facebook.presto.spi.function.AccumulatorState; +import com.facebook.presto.spi.function.AccumulatorStateMetadata; + +import java.util.Map; + +@AccumulatorStateMetadata( + stateSerializerClass = DifferentialMutualInformationStateSerializer.class, + stateFactoryClass = DifferentialMutualInformationClassificationStateFactory.class) +public interface DifferentialMutualInformationClassificationState + extends AccumulatorState +{ + void setFeatureStrategy(DifferentialEntropyStateStrategy strategy); + + DifferentialEntropyStateStrategy getFeatureStrategy(); + + void setOutcomeStrategy(DiscreteEntropyStateStrategy strategy); + + DiscreteEntropyStateStrategy getOutcomeStrategy(); + + void setFeatureStrategyForOutcome(DifferentialEntropyStateStrategy strategy, int outcome); + + DifferentialEntropyStateStrategy getFeatureStrategyForOutcome( + int outcome, DifferentialEntropyStateStrategy prototypeFeatureStrategy); + + void setFeatureStrategiesForOutcomes(Map strategies); + + Map getFeatureStrategiesForOutcomes(); +} diff --git a/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/DifferentialMutualInformationClassificationStateFactory.java b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/DifferentialMutualInformationClassificationStateFactory.java new file mode 100644 index 0000000000000..8e1668672eb8b --- /dev/null +++ b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/DifferentialMutualInformationClassificationStateFactory.java @@ -0,0 +1,264 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.operator.aggregation.differentialmutualinformationclassification; + +import com.facebook.presto.array.ObjectBigArray; +import com.facebook.presto.operator.aggregation.differentialentropy.DifferentialEntropyStateStrategy; +import com.facebook.presto.operator.aggregation.discreteentropy.DiscreteEntropyStateStrategy; +import com.facebook.presto.operator.aggregation.state.AbstractGroupedAccumulatorState; +import com.facebook.presto.spi.function.AccumulatorStateFactory; +import io.airlift.slice.SizeOf; + +import java.util.HashMap; +import java.util.Map; + +import static java.util.Objects.requireNonNull; + +public class DifferentialMutualInformationClassificationStateFactory + implements AccumulatorStateFactory +{ + @Override + public DifferentialMutualInformationClassificationState createSingleState() + { + return new SingleState(); + } + + @Override + public Class getSingleStateClass() + { + return SingleState.class; + } + + @Override + public DifferentialMutualInformationClassificationState createGroupedState() + { + return new GroupedState(); + } + + @Override + public Class getGroupedStateClass() + { + return GroupedState.class; + } + + public static class GroupedState + extends AbstractGroupedAccumulatorState + implements DifferentialMutualInformationClassificationState + { + private final ObjectBigArray featureStrategies = new ObjectBigArray<>(); + private final ObjectBigArray outcomeStrategies = new ObjectBigArray<>(); + ObjectBigArray> featureStrategiesPerOutcome = new ObjectBigArray<>(); + private long size; + + @Override + public void ensureCapacity(long size) + { + featureStrategies.ensureCapacity(size); + outcomeStrategies.ensureCapacity(size); + featureStrategiesPerOutcome.ensureCapacity(size); + } + + @Override + public void setFeatureStrategy(DifferentialEntropyStateStrategy strategy) + { + requireNonNull(strategy, "strategy is null"); + + DifferentialEntropyStateStrategy previous = getFeatureStrategy(); + if (previous != null) { + size -= previous.getEstimatedSize(); + } + + featureStrategies.set(getGroupId(), strategy); + size += strategy.getEstimatedSize(); + } + + @Override + public DifferentialEntropyStateStrategy getFeatureStrategy() + { + return featureStrategies.get(getGroupId()); + } + + @Override + public void setOutcomeStrategy(DiscreteEntropyStateStrategy strategy) + { + requireNonNull(strategy, "strategy is null"); + + DiscreteEntropyStateStrategy previous = getOutcomeStrategy(); + if (previous != null) { + size -= previous.getEstimatedSize(); + } + + outcomeStrategies.set(getGroupId(), strategy); + size += strategy.getEstimatedSize(); + } + + @Override + public DiscreteEntropyStateStrategy getOutcomeStrategy() + { + return outcomeStrategies.get(getGroupId()); + } + + @Override + public void setFeatureStrategyForOutcome(DifferentialEntropyStateStrategy strategy, int outcome) + { + requireNonNull(strategy, "strategy is null"); + + DifferentialEntropyStateStrategy previous = featureStrategiesPerOutcome.get(getGroupId()).get(outcome); + if (previous != null) { + size -= previous.getEstimatedSize(); + } + + if (featureStrategiesPerOutcome.get(getGroupId()) == null) { + featureStrategiesPerOutcome.set(getGroupId(), new HashMap<>()); + } + featureStrategiesPerOutcome.get(getGroupId()).put(outcome, strategy); + size += strategy.getEstimatedSize(); + } + + @Override + public DifferentialEntropyStateStrategy getFeatureStrategyForOutcome(int outcome, DifferentialEntropyStateStrategy prototypeFeatureStrategy) + { + Map relevantOutcomeStrategies = featureStrategiesPerOutcome.get(getGroupId()); + if (relevantOutcomeStrategies == null) { + relevantOutcomeStrategies = new HashMap<>(); + featureStrategiesPerOutcome.set(getGroupId(), relevantOutcomeStrategies); + } + DifferentialEntropyStateStrategy strategy = relevantOutcomeStrategies.get(outcome); + if (strategy == null) { + strategy = prototypeFeatureStrategy.cloneEmpty(); + relevantOutcomeStrategies.put(outcome, strategy); + } + return strategy; + } + + @Override + public void setFeatureStrategiesForOutcomes(Map strategies) + { + requireNonNull(strategies, "strategy is null"); + + Map previous = getFeatureStrategiesForOutcomes(); + if (previous != null) { + size -= getEstimatedSizeOfFeatureStrategiesForOutcomes(previous); + } + + this.featureStrategiesPerOutcome.set(getGroupId(), strategies); + size += getEstimatedSizeOfFeatureStrategiesForOutcomes(strategies); + } + + @Override + public Map getFeatureStrategiesForOutcomes() + { + return featureStrategiesPerOutcome.get(getGroupId()); + } + + @Override + public long getEstimatedSize() + { + return size + featureStrategies.sizeOf() + outcomeStrategies.sizeOf() + featureStrategiesPerOutcome.sizeOf(); + } + } + + public static class SingleState + implements DifferentialMutualInformationClassificationState + { + private DifferentialEntropyStateStrategy featureStrategy; + private DiscreteEntropyStateStrategy outcomeStrategy; + private Map featureStrategiesPerOutcome; + + @Override + public void setFeatureStrategy(DifferentialEntropyStateStrategy strategy) + { + requireNonNull(strategy, "strategy is null"); + + this.featureStrategy = strategy; + } + + @Override + public DifferentialEntropyStateStrategy getFeatureStrategy() + { + return featureStrategy; + } + + @Override + public void setOutcomeStrategy(DiscreteEntropyStateStrategy strategy) + { + requireNonNull(strategy, "strategy is null"); + + this.outcomeStrategy = strategy; + } + + @Override + public DiscreteEntropyStateStrategy getOutcomeStrategy() + { + return outcomeStrategy; + } + + @Override + public void setFeatureStrategyForOutcome(DifferentialEntropyStateStrategy strategy, int outcome) + { + requireNonNull(strategy, "strategy is null"); + featureStrategiesPerOutcome.put(outcome, strategy); + } + + @Override + public DifferentialEntropyStateStrategy getFeatureStrategyForOutcome( + int outcome, DifferentialEntropyStateStrategy prototypeFeatureStrategy) + { + if (featureStrategiesPerOutcome == null) { + featureStrategiesPerOutcome = new HashMap<>(); + } + DifferentialEntropyStateStrategy strategy = featureStrategiesPerOutcome.get(outcome); + if (strategy == null) { + strategy = prototypeFeatureStrategy.cloneEmpty(); + featureStrategiesPerOutcome.put(outcome, strategy); + } + return strategy; + } + + @Override + public void setFeatureStrategiesForOutcomes(Map outcomeStrategies) + { + requireNonNull(outcomeStrategies, "strategy is null"); + + this.featureStrategiesPerOutcome = outcomeStrategies; + } + + @Override + public Map getFeatureStrategiesForOutcomes() + { + return featureStrategiesPerOutcome; + } + + @Override + public long getEstimatedSize() + { + int size = 0; + if (featureStrategy != null) { + size += featureStrategy.getEstimatedSize() + + outcomeStrategy.getEstimatedSize() + + getEstimatedSizeOfFeatureStrategiesForOutcomes(featureStrategiesPerOutcome); + } + return size; + } + } + + private static long getEstimatedSizeOfFeatureStrategiesForOutcomes(Map strategies) + { + if (strategies == null) { + return 0; + } + return strategies.size() * SizeOf.SIZE_OF_INT + + strategies.values().stream().mapToLong(DifferentialEntropyStateStrategy::getEstimatedSize).sum(); + } +} diff --git a/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/DifferentialMutualInformationStateSerializer.java b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/DifferentialMutualInformationStateSerializer.java new file mode 100644 index 0000000000000..38e27accf1e00 --- /dev/null +++ b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/DifferentialMutualInformationStateSerializer.java @@ -0,0 +1,91 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.operator.aggregation.differentialmutualinformationclassification; + +import com.facebook.presto.operator.aggregation.differentialentropy.DifferentialEntropyStateStrategy; +import com.facebook.presto.spi.block.Block; +import com.facebook.presto.spi.block.BlockBuilder; +import com.facebook.presto.spi.function.AccumulatorStateSerializer; +import com.facebook.presto.spi.type.Type; +import io.airlift.slice.SizeOf; +import io.airlift.slice.SliceInput; +import io.airlift.slice.SliceOutput; +import io.airlift.slice.Slices; + +import java.util.HashMap; +import java.util.Map; + +import static com.facebook.presto.spi.type.VarbinaryType.VARBINARY; +import static com.google.common.base.Verify.verify; + +public class DifferentialMutualInformationStateSerializer + implements AccumulatorStateSerializer +{ + @Override + public Type getSerializedType() + { + return VARBINARY; + } + + @Override + public void serialize(DifferentialMutualInformationClassificationState state, BlockBuilder output) + { + DifferentialEntropyStateStrategy strategy = state.getFeatureStrategy(); + int requiredBytes = DifferentialEntropyStateStrategy.getRequiredBytesForSerialization(strategy); + requiredBytes += SizeOf.SIZE_OF_INT; + if (state.getFeatureStrategiesForOutcomes() != null) { + for (DifferentialEntropyStateStrategy outcomeStrategy : state.getFeatureStrategiesForOutcomes().values()) { + requiredBytes += SizeOf.SIZE_OF_INT + + DifferentialEntropyStateStrategy.getRequiredBytesForSerialization(outcomeStrategy); + } + } + SliceOutput sliceOut = Slices.allocate(requiredBytes).getOutput(); + DifferentialEntropyStateStrategy.serialize(strategy, sliceOut); + if (state.getFeatureStrategiesForOutcomes() == null) { + sliceOut.writeInt(0); + } + else { + sliceOut.writeInt(state.getFeatureStrategiesForOutcomes().size()); + for (Map.Entry entry : state.getFeatureStrategiesForOutcomes().entrySet()) { + sliceOut.writeInt(entry.getKey()); + DifferentialEntropyStateStrategy.serialize(entry.getValue(), sliceOut); + } + } + VARBINARY.writeSlice(output, sliceOut.getUnderlyingSlice()); + } + + @Override + public void deserialize( + Block block, + int index, + DifferentialMutualInformationClassificationState state) + { + SliceInput input = VARBINARY.getSlice(block, index).getInput(); + DifferentialEntropyStateStrategy strategy = DifferentialEntropyStateStrategy.deserialize(input); + if (strategy != null) { + state.setFeatureStrategy(strategy); + } + int numOutcomes = input.readInt(); + if (numOutcomes == 0) { + return; + } + Map outcomeStrategies = new HashMap<>(); + for (int i = 0; i < numOutcomes; ++i) { + int outcome = input.readInt(); + DifferentialEntropyStateStrategy outcomeStrategy = DifferentialEntropyStateStrategy.deserialize(input); + verify(outcomeStrategies.put(outcome, outcomeStrategy) == null, "outcomes must be unique"); + } + state.setFeatureStrategiesForOutcomes(outcomeStrategies); + } +} diff --git a/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/NormalizedDifferentialMutualInformationClassificationAggregation.java b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/NormalizedDifferentialMutualInformationClassificationAggregation.java new file mode 100644 index 0000000000000..67e597ef2dd37 --- /dev/null +++ b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/NormalizedDifferentialMutualInformationClassificationAggregation.java @@ -0,0 +1,230 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.operator.aggregation.differentialmutualinformationclassification; + +import com.facebook.presto.operator.aggregation.differentialentropy.DifferentialEntropyStateStrategy; +import com.facebook.presto.operator.aggregation.discreteentropy.DiscreteEntropyStateStrategy; +import com.facebook.presto.spi.block.BlockBuilder; +import com.facebook.presto.spi.function.AggregationFunction; +import com.facebook.presto.spi.function.AggregationState; +import com.facebook.presto.spi.function.CombineFunction; +import com.facebook.presto.spi.function.Description; +import com.facebook.presto.spi.function.InputFunction; +import com.facebook.presto.spi.function.OutputFunction; +import com.facebook.presto.spi.function.SqlType; +import com.facebook.presto.spi.type.StandardTypes; +import io.airlift.slice.Slice; + +import java.util.Map; + +import static com.facebook.presto.spi.type.DoubleType.DOUBLE; +import static java.util.Locale.ENGLISH; + +@AggregationFunction("normalized_differential_mutual_information_classification") +@Description("Computes the normalized mutual information score for classification") +public final class NormalizedDifferentialMutualInformationClassificationAggregation +{ + private NormalizedDifferentialMutualInformationClassificationAggregation() {} + + @InputFunction + public static void input( + @AggregationState DifferentialMutualInformationClassificationState state, + @SqlType(StandardTypes.BIGINT) long size, + @SqlType(StandardTypes.BIGINT) long outcome, + @SqlType(StandardTypes.DOUBLE) double sample, + @SqlType(StandardTypes.DOUBLE) double weight, + @SqlType(StandardTypes.VARCHAR) Slice method, + @SqlType(StandardTypes.DOUBLE) double min, + @SqlType(StandardTypes.DOUBLE) double max) + { + DifferentialEntropyStateStrategy featureStrategy = DifferentialEntropyStateStrategy.getStrategy( + state.getFeatureStrategy(), + size, + sample, + weight, + method.toStringUtf8().toLowerCase(ENGLISH), + min, + max); + featureStrategy.add(sample, weight); + DiscreteEntropyStateStrategy outcomeStrategy = DiscreteEntropyStateStrategy.getStrategy( + state.getOutcomeStrategy(), + featureStrategy, + weight); + int effectiveOutcome = Long.valueOf(outcome).hashCode(); + outcomeStrategy.add(effectiveOutcome, weight); + DifferentialEntropyStateStrategy featureStrategyForOutcome = state.getFeatureStrategyForOutcome(effectiveOutcome, featureStrategy); + featureStrategyForOutcome.add(sample, weight); + state.setFeatureStrategy(featureStrategy); + } + + @InputFunction + public static void input( + @AggregationState DifferentialMutualInformationClassificationState state, + @SqlType(StandardTypes.BIGINT) long size, + @SqlType(StandardTypes.BOOLEAN) boolean outcome, + @SqlType(StandardTypes.DOUBLE) double sample, + @SqlType(StandardTypes.DOUBLE) double weight, + @SqlType(StandardTypes.VARCHAR) Slice method, + @SqlType(StandardTypes.DOUBLE) double min, + @SqlType(StandardTypes.DOUBLE) double max) + { + input( + state, + size, + outcome ? 1 : 0, + sample, + weight, + method, + min, + max); + } + + @InputFunction + public static void input( + @AggregationState DifferentialMutualInformationClassificationState state, + @SqlType(StandardTypes.BIGINT) long size, + @SqlType(StandardTypes.BIGINT) long outcome, + @SqlType(StandardTypes.DOUBLE) double sample, + @SqlType(StandardTypes.DOUBLE) double weight) + { + DifferentialEntropyStateStrategy featureStrategy = DifferentialEntropyStateStrategy.getStrategy( + state.getFeatureStrategy(), + size, + sample, + weight); + featureStrategy.add(sample, weight); + DiscreteEntropyStateStrategy outcomeStrategy = DiscreteEntropyStateStrategy.getStrategy( + state.getOutcomeStrategy(), + featureStrategy, + weight); + int effectiveOutcome = Long.valueOf(outcome).hashCode(); + outcomeStrategy.add(effectiveOutcome); + DifferentialEntropyStateStrategy featureStrategyForOutcome = state.getFeatureStrategyForOutcome(effectiveOutcome, featureStrategy); + featureStrategyForOutcome.add(sample, weight); + state.setFeatureStrategy(featureStrategy); + } + + @InputFunction + public static void input( + @AggregationState DifferentialMutualInformationClassificationState state, + @SqlType(StandardTypes.BIGINT) long size, + @SqlType(StandardTypes.BOOLEAN) boolean outcome, + @SqlType(StandardTypes.DOUBLE) double sample, + @SqlType(StandardTypes.DOUBLE) double weight) + { + input( + state, + size, + outcome ? 1 : 0, + sample, + weight); + } + + @InputFunction + public static void input( + @AggregationState DifferentialMutualInformationClassificationState state, + @SqlType(StandardTypes.BIGINT) long size, + @SqlType(StandardTypes.BIGINT) long outcome, + @SqlType(StandardTypes.DOUBLE) double sample) + { + DifferentialEntropyStateStrategy featureStrategy = DifferentialEntropyStateStrategy.getStrategy( + state.getFeatureStrategy(), + size, + sample); + featureStrategy.add(sample); + DiscreteEntropyStateStrategy outcomeStrategy = DiscreteEntropyStateStrategy.getStrategy( + state.getOutcomeStrategy(), + featureStrategy); + int effectiveOutcome = Long.valueOf(outcome).hashCode(); + outcomeStrategy.add(effectiveOutcome); + DifferentialEntropyStateStrategy featureStrategyForOutcome = + state.getFeatureStrategyForOutcome(effectiveOutcome, featureStrategy); + featureStrategyForOutcome.add(sample); + state.setFeatureStrategy(featureStrategy); + } + + @InputFunction + public static void input( + @AggregationState DifferentialMutualInformationClassificationState state, + @SqlType(StandardTypes.BIGINT) long size, + @SqlType(StandardTypes.BOOLEAN) boolean outcome, + @SqlType(StandardTypes.DOUBLE) double sample) + { + input( + state, + size, + outcome ? 1 : 0, + sample); + } + + @CombineFunction + public static void combine( + @AggregationState DifferentialMutualInformationClassificationState state, + @AggregationState DifferentialMutualInformationClassificationState otherState) + { + DifferentialEntropyStateStrategy featureStrategy = state.getFeatureStrategy(); + DifferentialEntropyStateStrategy otherFeatureStrategy = otherState.getFeatureStrategy(); + if (featureStrategy == null && otherFeatureStrategy != null) { + state.setFeatureStrategy(otherFeatureStrategy.clone()); + state.setFeatureStrategiesForOutcomes(otherState.getFeatureStrategiesForOutcomes()); + return; + } + if (otherFeatureStrategy == null) { + return; + } + DifferentialEntropyStateStrategy.combine(featureStrategy, otherFeatureStrategy); + for (Map.Entry entry : otherState.getFeatureStrategiesForOutcomes().entrySet()) { + DifferentialEntropyStateStrategy currentOutcomeStateStrategy = state.getFeatureStrategyForOutcome(entry.getKey(), featureStrategy); + DifferentialEntropyStateStrategy.combine(currentOutcomeStateStrategy, entry.getValue()); + } + } + + @OutputFunction("double") + public static void output(@AggregationState DifferentialMutualInformationClassificationState state, BlockBuilder out) + { + DiscreteEntropyStateStrategy outcomeStrategy = state.getOutcomeStrategy(); + if (outcomeStrategy == null) { + DOUBLE.writeDouble(out, Double.NaN); + return; + } + double outcomeEntropy = outcomeStrategy.calculateEntropy(); + if (outcomeEntropy == 0.0) { + DOUBLE.writeDouble(out, Double.NaN); + return; + } + DifferentialEntropyStateStrategy featureStrategy = state.getFeatureStrategy(); + Map outcomeStrategies = state.getFeatureStrategiesForOutcomes(); + double totalPopulationWeight = outcomeStrategies.values().stream() + .mapToDouble(DifferentialEntropyStateStrategy::getTotalPopulationWeight) + .sum(); + if (totalPopulationWeight == 0.0) { + DOUBLE.writeDouble(out, Double.NaN); + return; + } + double featureEntropy = featureStrategy.calculateEntropy(); + if (featureEntropy == 0.0) { + DOUBLE.writeDouble(out, Double.NaN); + return; + } + double mutualInformation = featureEntropy; + mutualInformation -= outcomeStrategies.values().stream() + .mapToDouble(featureStrategyForOutcome -> { + double featureEntropyPerOutcome = featureStrategyForOutcome.calculateEntropy(); + return featureStrategyForOutcome.getTotalPopulationWeight() / totalPopulationWeight * featureEntropyPerOutcome; + }) + .sum(); + double normalizedMutualInformation = mutualInformation / outcomeEntropy; + DOUBLE.writeDouble(out, Math.min(1.0, Math.max(0.0, normalizedMutualInformation))); + } +} diff --git a/presto-main/src/main/java/com/facebook/presto/operator/aggregation/discreteentropy/DiscreteEntropyAggregation.java b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/discreteentropy/DiscreteEntropyAggregation.java new file mode 100644 index 0000000000000..e24ea0d4fea91 --- /dev/null +++ b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/discreteentropy/DiscreteEntropyAggregation.java @@ -0,0 +1,257 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.operator.aggregation.discreteentropy; + +import com.facebook.presto.spi.block.BlockBuilder; +import com.facebook.presto.spi.function.AggregationFunction; +import com.facebook.presto.spi.function.AggregationState; +import com.facebook.presto.spi.function.CombineFunction; +import com.facebook.presto.spi.function.Description; +import com.facebook.presto.spi.function.InputFunction; +import com.facebook.presto.spi.function.OutputFunction; +import com.facebook.presto.spi.function.SqlType; +import com.facebook.presto.spi.type.StandardTypes; +import io.airlift.slice.Slice; + +import static com.facebook.presto.spi.type.DoubleType.DOUBLE; +import static java.util.Locale.ENGLISH; + +@AggregationFunction("discrete_entropy") +@Description("Computes discrete entropy based on random-variable samples") +public final class DiscreteEntropyAggregation +{ + private DiscreteEntropyAggregation() {} + + @InputFunction + public static void input( + @AggregationState DiscreteEntropyState state, + @SqlType(StandardTypes.INTEGER) int sample, + @SqlType(StandardTypes.DOUBLE) double weight, + @SqlType(StandardTypes.VARCHAR) Slice method) + { + DiscreteEntropyStateStrategy strategy = DiscreteEntropyStateStrategy.getStrategy( + state.getStrategy(), + weight, + method.toStringUtf8().toLowerCase(ENGLISH)); + strategy.add(sample, weight); + state.setStrategy(strategy); + } + + @InputFunction + public static void input( + @AggregationState DiscreteEntropyState state, + @SqlType(StandardTypes.BIGINT) long sample, + @SqlType(StandardTypes.DOUBLE) double weight, + @SqlType(StandardTypes.VARCHAR) Slice method) + { + input(state, Long.valueOf(sample).hashCode(), weight, method); + } + + @InputFunction + public static void input( + @AggregationState DiscreteEntropyState state, + @SqlType(StandardTypes.BOOLEAN) boolean sample, + @SqlType(StandardTypes.DOUBLE) double weight, + @SqlType(StandardTypes.VARCHAR) Slice method) + { + input(state, sample ? 1 : 0, weight, method); + } + + @InputFunction + public static void input( + @AggregationState DiscreteEntropyState state, + @SqlType(StandardTypes.DOUBLE) double sample, + @SqlType(StandardTypes.DOUBLE) double weight, + @SqlType(StandardTypes.VARCHAR) Slice method) + { + input(state, Double.valueOf(sample).hashCode(), weight, method); + } + + @InputFunction + public static void input( + @AggregationState DiscreteEntropyState state, + @SqlType(StandardTypes.VARCHAR) Slice sample, + @SqlType(StandardTypes.DOUBLE) double weight, + @SqlType(StandardTypes.VARCHAR) Slice method) + { + input(state, sample.toStringUtf8().hashCode(), weight, method); + } + + @InputFunction + public static void input( + @AggregationState DiscreteEntropyState state, + @SqlType(StandardTypes.INTEGER) int sample, + @SqlType(StandardTypes.VARCHAR) Slice method) + { + DiscreteEntropyStateStrategy strategy = DiscreteEntropyStateStrategy.getStrategy( + state.getStrategy(), + method.toStringUtf8().toLowerCase(ENGLISH)); + strategy.add(sample); + state.setStrategy(strategy); + } + + @InputFunction + public static void input( + @AggregationState DiscreteEntropyState state, + @SqlType(StandardTypes.BIGINT) long sample, + @SqlType(StandardTypes.VARCHAR) Slice method) + { + input(state, Long.valueOf(sample).hashCode(), method); + } + + @InputFunction + public static void input( + @AggregationState DiscreteEntropyState state, + @SqlType(StandardTypes.BOOLEAN) boolean sample, + @SqlType(StandardTypes.VARCHAR) Slice method) + { + input(state, sample ? 1 : 0, method); + } + + @InputFunction + public static void input( + @AggregationState DiscreteEntropyState state, + @SqlType(StandardTypes.DOUBLE) double sample, + @SqlType(StandardTypes.VARCHAR) Slice method) + { + input(state, Double.valueOf(sample).hashCode(), method); + } + + @InputFunction + public static void input( + @AggregationState DiscreteEntropyState state, + @SqlType(StandardTypes.VARCHAR) Slice sample, + @SqlType(StandardTypes.VARCHAR) Slice method) + { + input(state, sample.toStringUtf8().hashCode(), method); + } + + @InputFunction + public static void input( + @AggregationState DiscreteEntropyState state, + @SqlType(StandardTypes.INTEGER) int sample, + @SqlType(StandardTypes.DOUBLE) double weight) + { + DiscreteEntropyStateStrategy strategy = DiscreteEntropyStateStrategy.getStrategy( + state.getStrategy(), + weight); + strategy.add(sample, weight); + state.setStrategy(strategy); + } + + @InputFunction + public static void input( + @AggregationState DiscreteEntropyState state, + @SqlType(StandardTypes.BIGINT) long sample, + @SqlType(StandardTypes.DOUBLE) double weight) + { + input(state, Long.valueOf(sample).hashCode(), weight); + } + + @InputFunction + public static void input( + @AggregationState DiscreteEntropyState state, + @SqlType(StandardTypes.BOOLEAN) boolean sample, + @SqlType(StandardTypes.DOUBLE) double weight) + { + input(state, sample ? 1 : 0, weight); + } + + @InputFunction + public static void input( + @AggregationState DiscreteEntropyState state, + @SqlType(StandardTypes.DOUBLE) double sample, + @SqlType(StandardTypes.DOUBLE) double weight) + { + input(state, Double.valueOf(sample).hashCode(), weight); + } + + @InputFunction + public static void input( + @AggregationState DiscreteEntropyState state, + @SqlType(StandardTypes.VARCHAR) Slice sample, + @SqlType(StandardTypes.DOUBLE) double weight) + { + input(state, sample.toStringUtf8().hashCode(), weight); + } + + @InputFunction + public static void input( + @AggregationState DiscreteEntropyState state, + @SqlType(StandardTypes.INTEGER) int sample) + { + DiscreteEntropyStateStrategy strategy = DiscreteEntropyStateStrategy.getStrategy( + state.getStrategy()); + strategy.add(sample); + state.setStrategy(strategy); + } + + @InputFunction + public static void input( + @AggregationState DiscreteEntropyState state, + @SqlType(StandardTypes.BIGINT) long sample) + { + input(state, Long.valueOf(sample).hashCode()); + } + + @InputFunction + public static void input( + @AggregationState DiscreteEntropyState state, + @SqlType(StandardTypes.BOOLEAN) boolean sample) + { + input(state, sample ? 1 : 0); + } + + @InputFunction + public static void input( + @AggregationState DiscreteEntropyState state, + @SqlType(StandardTypes.DOUBLE) double sample) + { + input(state, Double.valueOf(sample).hashCode()); + } + + @InputFunction + public static void input( + @AggregationState DiscreteEntropyState state, + @SqlType(StandardTypes.VARCHAR) Slice sample) + { + input(state, sample.toStringUtf8().hashCode()); + } + + @CombineFunction + public static void combine( + @AggregationState DiscreteEntropyState state, + @AggregationState DiscreteEntropyState otherState) + { + DiscreteEntropyStateStrategy strategy = state.getStrategy(); + DiscreteEntropyStateStrategy otherStrategy = otherState.getStrategy(); + if (strategy == null && otherStrategy != null) { + state.setStrategy(otherStrategy); + return; + } + if (otherStrategy == null) { + return; + } + DiscreteEntropyStateStrategy.combine(strategy, otherStrategy); + state.setStrategy(strategy); + } + + @OutputFunction("double") + public static void output(@AggregationState DiscreteEntropyState state, BlockBuilder out) + { + DiscreteEntropyStateStrategy strategy = state.getStrategy(); + double result = strategy == null ? 0.0 : strategy.calculateEntropy(); + DOUBLE.writeDouble(out, result); + } +} diff --git a/presto-main/src/main/java/com/facebook/presto/operator/aggregation/discreteentropy/DiscreteEntropyState.java b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/discreteentropy/DiscreteEntropyState.java new file mode 100644 index 0000000000000..73ad14fa8335b --- /dev/null +++ b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/discreteentropy/DiscreteEntropyState.java @@ -0,0 +1,28 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.operator.aggregation.discreteentropy; + +import com.facebook.presto.spi.function.AccumulatorState; +import com.facebook.presto.spi.function.AccumulatorStateMetadata; + +@AccumulatorStateMetadata( + stateSerializerClass = DiscreteEntropyStateSerializer.class, + stateFactoryClass = DiscreteEntropyStateFactory.class) +public interface DiscreteEntropyState + extends AccumulatorState +{ + void setStrategy(DiscreteEntropyStateStrategy strategy); + + DiscreteEntropyStateStrategy getStrategy(); +} diff --git a/presto-main/src/main/java/com/facebook/presto/operator/aggregation/discreteentropy/DiscreteEntropyStateFactory.java b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/discreteentropy/DiscreteEntropyStateFactory.java new file mode 100644 index 0000000000000..2ab7466adb4ef --- /dev/null +++ b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/discreteentropy/DiscreteEntropyStateFactory.java @@ -0,0 +1,115 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.operator.aggregation.discreteentropy; + +import com.facebook.presto.array.ObjectBigArray; +import com.facebook.presto.operator.aggregation.state.AbstractGroupedAccumulatorState; +import com.facebook.presto.spi.function.AccumulatorStateFactory; + +import static java.util.Objects.requireNonNull; + +public class DiscreteEntropyStateFactory + implements AccumulatorStateFactory +{ + @Override + public DiscreteEntropyState createSingleState() + { + return new SingleState(); + } + + @Override + public Class getSingleStateClass() + { + return SingleState.class; + } + + @Override + public DiscreteEntropyState createGroupedState() + { + return new GroupedState(); + } + + @Override + public Class getGroupedStateClass() + { + return GroupedState.class; + } + + public static class GroupedState + extends AbstractGroupedAccumulatorState + implements DiscreteEntropyState + { + private final ObjectBigArray strategies = new ObjectBigArray<>(); + private long size; + + @Override + public void ensureCapacity(long size) + { + strategies.ensureCapacity(size); + } + + @Override + public void setStrategy(DiscreteEntropyStateStrategy strategy) + { + DiscreteEntropyStateStrategy previous = requireNonNull(strategy, "strategy is null"); + if (previous != null) { + size -= previous.getEstimatedSize(); + } + + strategies.set(getGroupId(), strategy); + size += strategy.getEstimatedSize(); + } + + @Override + public DiscreteEntropyStateStrategy getStrategy() + { + return strategies.get(getGroupId()); + } + + @Override + public long getEstimatedSize() + { + return size + strategies.sizeOf(); + } + } + + public static class SingleState + implements DiscreteEntropyState + { + private DiscreteEntropyStateStrategy strategy; + + @Override + public void setStrategy(DiscreteEntropyStateStrategy strategy) + { + requireNonNull(strategy, "strategy is null"); + + this.strategy = strategy; + } + + @Override + public DiscreteEntropyStateStrategy getStrategy() + { + return strategy; + } + + @Override + public long getEstimatedSize() + { + if (strategy == null) { + return 0; + } + return strategy.getEstimatedSize(); + } + } +} diff --git a/presto-main/src/main/java/com/facebook/presto/operator/aggregation/discreteentropy/DiscreteEntropyStateSerializer.java b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/discreteentropy/DiscreteEntropyStateSerializer.java new file mode 100644 index 0000000000000..4b73ca0bbda16 --- /dev/null +++ b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/discreteentropy/DiscreteEntropyStateSerializer.java @@ -0,0 +1,57 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.operator.aggregation.discreteentropy; + +import com.facebook.presto.spi.block.Block; +import com.facebook.presto.spi.block.BlockBuilder; +import com.facebook.presto.spi.function.AccumulatorStateSerializer; +import com.facebook.presto.spi.type.Type; +import io.airlift.slice.SliceInput; +import io.airlift.slice.SliceOutput; +import io.airlift.slice.Slices; + +import static com.facebook.presto.spi.type.VarbinaryType.VARBINARY; + +public class DiscreteEntropyStateSerializer + implements AccumulatorStateSerializer +{ + @Override + public Type getSerializedType() + { + return VARBINARY; + } + + @Override + public void serialize(DiscreteEntropyState state, BlockBuilder output) + { + DiscreteEntropyStateStrategy strategy = state.getStrategy(); + int requiredBytes = DiscreteEntropyStateStrategy.getRequiredBytesForSerialization(strategy); + SliceOutput sliceOut = Slices.allocate(requiredBytes).getOutput(); + DiscreteEntropyStateStrategy.serialize(strategy, sliceOut); + VARBINARY.writeSlice(output, sliceOut.getUnderlyingSlice()); + } + + @Override + public void deserialize( + Block block, + int index, + DiscreteEntropyState state) + { + SliceInput input = VARBINARY.getSlice(block, index).getInput(); + DiscreteEntropyStateStrategy strategy = DiscreteEntropyStateStrategy.deserialize(input); + if (strategy != null) { + state.setStrategy(strategy); + } + } +} diff --git a/presto-main/src/main/java/com/facebook/presto/operator/aggregation/discreteentropy/DiscreteEntropyStateStrategy.java b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/discreteentropy/DiscreteEntropyStateStrategy.java new file mode 100644 index 0000000000000..99dbf28ca2281 --- /dev/null +++ b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/discreteentropy/DiscreteEntropyStateStrategy.java @@ -0,0 +1,323 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.operator.aggregation.discreteentropy; + +import com.facebook.presto.operator.aggregation.differentialentropy.DifferentialEntropyStateStrategy; +import com.facebook.presto.operator.aggregation.differentialentropy.FixedHistogramJacknifeStateStrategy; +import com.facebook.presto.operator.aggregation.differentialentropy.FixedHistogramMleStateStrategy; +import com.facebook.presto.operator.aggregation.differentialentropy.UnweightedReservoirSampleStateStrategy; +import com.facebook.presto.operator.aggregation.differentialentropy.WeightedReservoirSampleStateStrategy; +import com.facebook.presto.spi.PrestoException; +import com.google.common.annotations.VisibleForTesting; +import io.airlift.slice.SizeOf; +import io.airlift.slice.SliceInput; +import io.airlift.slice.SliceOutput; + +import static com.facebook.presto.spi.StandardErrorCode.INVALID_FUNCTION_ARGUMENT; +import static com.google.common.base.Verify.verify; +import static java.lang.String.format; + +/** + * Interface for different strategies for calculating entropy: MLE (maximum likelihood + * estimator) using NumericHistogram, jacknife estimates using a fixed histogram, compressed + * counting and Renyi entropy, and so forth. + */ +public interface DiscreteEntropyStateStrategy + extends Cloneable +{ + @VisibleForTesting + String MLE_METHOD_NAME = "mle"; + @VisibleForTesting + String JACKNIFE_METHOD_NAME = "jacknife"; + + static DiscreteEntropyStateStrategy getStrategy( + DiscreteEntropyStateStrategy strategy, + double weight, + String method) + { + if (strategy == null) { + switch (method) { + case DiscreteEntropyStateStrategy.MLE_METHOD_NAME: + strategy = new WeightedMleStateStrategy(); + break; + case DiscreteEntropyStateStrategy.JACKNIFE_METHOD_NAME: + strategy = new WeightedJacknifeStateStrategy(); + break; + default: + throw new PrestoException( + INVALID_FUNCTION_ARGUMENT, + format("In discrete_entropy UDF, invalid method: %s", method)); + } + } + else { + switch (method) { + case DiscreteEntropyStateStrategy.MLE_METHOD_NAME: + if (!(strategy instanceof WeightedMleStateStrategy)) { + throw new PrestoException( + INVALID_FUNCTION_ARGUMENT, + format("In differential_entropy, strategy class is not compatible with entropy method: %s %s", strategy.getClass().getSimpleName(), method)); + } + break; + case DiscreteEntropyStateStrategy.JACKNIFE_METHOD_NAME: + if (!(strategy instanceof WeightedJacknifeStateStrategy)) { + throw new PrestoException( + INVALID_FUNCTION_ARGUMENT, + format("In differential_entropy, strategy class is not compatible with entropy method: %s %s", strategy.getClass().getSimpleName(), method)); + } + break; + default: + throw new PrestoException( + INVALID_FUNCTION_ARGUMENT, + format("In differential_entropy, unknown entropy method: %s", method)); + } + } + validateWeight(weight); + return strategy; + } + + static DiscreteEntropyStateStrategy getStrategy( + DiscreteEntropyStateStrategy strategy, + double weight) + { + if (strategy == null) { + strategy = new WeightedMleStateStrategy(); + } + else { + if (!(strategy instanceof WeightedMleStateStrategy)) { + throw new PrestoException( + INVALID_FUNCTION_ARGUMENT, + format("In differential_entropy, strategy class is not compatible with entropy method: %s", strategy.getClass().getSimpleName())); + } + } + validateWeight(weight); + return strategy; + } + + static DiscreteEntropyStateStrategy getStrategy( + DiscreteEntropyStateStrategy strategy, + String method) + { + if (strategy == null) { + switch (method) { + case DiscreteEntropyStateStrategy.MLE_METHOD_NAME: + strategy = new UnweightedMleStateStrategy(); + break; + case DiscreteEntropyStateStrategy.JACKNIFE_METHOD_NAME: + strategy = new UnweightedJacknifeStateStrategy(); + break; + default: + throw new PrestoException( + INVALID_FUNCTION_ARGUMENT, + format("In discrete_entropy UDF, invalid method: %s", method)); + } + } + else { + switch (method) { + case DiscreteEntropyStateStrategy.MLE_METHOD_NAME: + if (!(strategy instanceof UnweightedMleStateStrategy)) { + throw new PrestoException( + INVALID_FUNCTION_ARGUMENT, + format("In differential_entropy, strategy class is not compatible with entropy method: %s %s", strategy.getClass().getSimpleName(), method)); + } + break; + case DiscreteEntropyStateStrategy.JACKNIFE_METHOD_NAME: + if (!(strategy instanceof UnweightedJacknifeStateStrategy)) { + throw new PrestoException( + INVALID_FUNCTION_ARGUMENT, + format("In differential_entropy, strategy class is not compatible with entropy method: %s %s", strategy.getClass().getSimpleName(), method)); + } + break; + default: + throw new PrestoException( + INVALID_FUNCTION_ARGUMENT, + format("In differential_entropy, unknown entropy method: %s", method)); + } + } + return strategy; + } + + static DiscreteEntropyStateStrategy getStrategy( + DiscreteEntropyStateStrategy strategy) + { + if (strategy == null) { + strategy = new UnweightedMleStateStrategy(); + } + else { + if (!(strategy instanceof UnweightedMleStateStrategy)) { + throw new PrestoException( + INVALID_FUNCTION_ARGUMENT, + format("In differential_entropy, strategy class is not compatible with entropy method: %s %s", strategy.getClass().getSimpleName())); + } + } + return strategy; + } + + static DiscreteEntropyStateStrategy getStrategy( + DiscreteEntropyStateStrategy strategy, + DifferentialEntropyStateStrategy differentialStrategy) + { + if (strategy == null) { + if (differentialStrategy instanceof FixedHistogramMleStateStrategy) { + strategy = new UnweightedMleStateStrategy(); + } + else if (differentialStrategy instanceof FixedHistogramJacknifeStateStrategy) { + strategy = new UnweightedJacknifeStateStrategy(); + } + else if (differentialStrategy instanceof UnweightedReservoirSampleStateStrategy) { + strategy = new UnweightedMleStateStrategy(); + } + else { + throw new PrestoException( + INVALID_FUNCTION_ARGUMENT, + format( + "In discrete_entropy UDF, invalid differential entropy class: %s", + differentialStrategy.getClass().getSimpleName())); + } + } + return strategy; + } + + static DiscreteEntropyStateStrategy getStrategy( + DiscreteEntropyStateStrategy strategy, + DifferentialEntropyStateStrategy differentialStrategy, + double weight) + { + if (strategy == null) { + if (differentialStrategy instanceof FixedHistogramMleStateStrategy) { + strategy = new WeightedMleStateStrategy(); + } + else if (differentialStrategy instanceof FixedHistogramJacknifeStateStrategy) { + strategy = new WeightedJacknifeStateStrategy(); + } + else if (differentialStrategy instanceof WeightedReservoirSampleStateStrategy) { + strategy = new WeightedMleStateStrategy(); + } + else { + throw new PrestoException( + INVALID_FUNCTION_ARGUMENT, + format( + "In discrete_entropy UDF, invalid differential entropy class: %s", + differentialStrategy.getClass().getSimpleName())); + } + } + return strategy; + } + + default void add(int sample, double weight) + { + verify(false, format("Weighted unsupported for type: %s", getClass().getSimpleName())); + } + + default void add(int sample) + { + verify(false, format("Unweighted unsupported for type: %s", getClass().getSimpleName())); + } + + double calculateEntropy(); + + long getEstimatedSize(); + + static int getRequiredBytesForSerialization(DiscreteEntropyStateStrategy strategy) + { + return SizeOf.SIZE_OF_INT + // magic hash + SizeOf.SIZE_OF_INT + // method + (strategy == null ? 0 : strategy.getRequiredBytesForSpecificSerialization()); // strategy + } + + int getRequiredBytesForSpecificSerialization(); + + static void serialize(DiscreteEntropyStateStrategy strategy, SliceOutput sliceOut) + { + sliceOut.appendInt(DiscreteEntropyStateStrategy.class.getSimpleName().hashCode()); + if (strategy == null) { + sliceOut.appendInt(0); + return; + } + + if (strategy instanceof UnweightedMleStateStrategy) { + sliceOut.appendInt(1); + } + else if (strategy instanceof WeightedMleStateStrategy) { + sliceOut.appendInt(2); + } + else if (strategy instanceof UnweightedJacknifeStateStrategy) { + sliceOut.appendInt(3); + } + else if (strategy instanceof WeightedJacknifeStateStrategy) { + sliceOut.appendInt(4); + } + else { + verify(false, format("Strategy cannot be serialized: %s", strategy.getClass().getSimpleName())); + } + + strategy.serialize(sliceOut); + } + + static DiscreteEntropyStateStrategy deserialize(SliceInput input) + { + int hash = input.readInt(); + verify( + hash == DiscreteEntropyStateStrategy.class.getSimpleName().hashCode(), + "magic failed"); + int method = input.readInt(); + DiscreteEntropyStateStrategy strategy = null; + switch (method) { + case 0: + strategy = null; + break; + case 1: + strategy = UnweightedMleStateStrategy.deserialize(input); + break; + case 2: + strategy = WeightedMleStateStrategy.deserialize(input); + break; + case 3: + strategy = UnweightedJacknifeStateStrategy.deserialize(input); + break; + case 4: + strategy = WeightedJacknifeStateStrategy.deserialize(input); + break; + default: + verify( + false, + format("method unknown when deserializing: %s", method)); + } + return strategy; + } + + static void combine( + DiscreteEntropyStateStrategy strategy, + DiscreteEntropyStateStrategy otherStrategy) + { + verify(strategy.getClass() == otherStrategy.getClass(), + format("In combine, %s != %s", strategy.getClass().getSimpleName(), otherStrategy.getClass().getSimpleName())); + + strategy.mergeWith(otherStrategy); + } + + void serialize(SliceOutput out); + + void mergeWith(DiscreteEntropyStateStrategy other); + + DiscreteEntropyStateStrategy clone(); + + static void validateWeight(double weight) + { + if (weight < 0.0) { + throw new PrestoException( + INVALID_FUNCTION_ARGUMENT, + format("Weight cannot be negative: %s", weight)); + } + } +} diff --git a/presto-main/src/main/java/com/facebook/presto/operator/aggregation/discreteentropy/EntropyCalculations.java b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/discreteentropy/EntropyCalculations.java new file mode 100644 index 0000000000000..00ea6cbb1a32f --- /dev/null +++ b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/discreteentropy/EntropyCalculations.java @@ -0,0 +1,104 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.operator.aggregation.discreteentropy; + +public class EntropyCalculations +{ + private EntropyCalculations() {} + + public static double calculateEntropy(int[] counts) + { + double sum = 0.0; + for (int i = 0; i < counts.length; ++i) { + sum += counts[i]; + } + if (sum == 0) { + return 0.0; + } + double entropy = 0; + for (int i = 0; i < counts.length; ++i) { + if (counts[i] > 0) { + entropy += (counts[i] / sum) * Math.log(sum / counts[i]); + } + } + return Math.max(entropy, 0.0) / Math.log(2); + } + + public static double calculateEntropy(double[] weights) + { + double sum = 0.0; + for (int i = 0; i < weights.length; ++i) { + sum += weights[i]; + } + if (sum == 0) { + return 0.0; + } + double entropy = 0; + for (int i = 0; i < weights.length; ++i) { + if (weights[i] > 0) { + entropy += (weights[i] / sum) * Math.log(sum / weights[i]); + } + } + return Math.max(entropy, 0.0) / Math.log(2); + } + + public static double calculateEntropy(double[] weights, int[] counts) + { + double sum = 0.0; + for (int i = 0; i < weights.length; ++i) { + sum += weights[i] * counts[i]; + } + if (sum == 0) { + return 0.0; + } + double entropy = 0; + for (int i = 0; i < weights.length; ++i) { + if (weights[i] * counts[i] > 0) { + entropy += (weights[i] * counts[i] / sum) * Math.log(sum / (weights[i] * counts[i])); + } + } + return Math.max(entropy, 0.0) / Math.log(2); + } + + public static double getHoldOutEntropy( + long n, + double sumW, + double sumWeightLogWeight, + double bucketWeight, + double entryWeight, + long entryMultiplicity) + { + double holdoutBucketWeight = Math.max(bucketWeight - entryWeight, 0); + double holdoutSumWeight = + sumW - bucketWeight + holdoutBucketWeight; + double holdoutSumWeightLogWeight = + sumWeightLogWeight - getXLogX(bucketWeight) + getXLogX(holdoutBucketWeight); + double holdoutEntropy = entryMultiplicity * (n - 1) * + calculateEntropyFromAggregates(holdoutSumWeight, holdoutSumWeightLogWeight) / n; + return holdoutEntropy; + } + + private static double getXLogX(double x) + { + return x <= 0.0 ? 0.0 : x * Math.log(x); + } + + public static double calculateEntropyFromAggregates(double sumWeight, double sumWeightLogWeight) + { + if (sumWeight <= 0) { + return 0.0; + } + return Math.max((Math.log(sumWeight) - sumWeightLogWeight / sumWeight) / Math.log(2.0), 0.0); + } +} diff --git a/presto-main/src/main/java/com/facebook/presto/operator/aggregation/discreteentropy/UnweightedJacknifeStateStrategy.java b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/discreteentropy/UnweightedJacknifeStateStrategy.java new file mode 100644 index 0000000000000..3747fd64a6170 --- /dev/null +++ b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/discreteentropy/UnweightedJacknifeStateStrategy.java @@ -0,0 +1,183 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.operator.aggregation.discreteentropy; + +import io.airlift.slice.SizeOf; +import io.airlift.slice.SliceInput; +import io.airlift.slice.SliceOutput; +import io.airlift.slice.Slices; +import org.openjdk.jol.info.ClassLayout; + +import java.util.Arrays; +import java.util.stream.IntStream; + +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Preconditions.checkState; + +public class UnweightedJacknifeStateStrategy + implements DiscreteEntropyStateStrategy +{ + private static final int INSTANCE_SIZE = ClassLayout.parseClass(UnweightedJacknifeStateStrategy.class).instanceSize(); + private int[] samples; + private int[] counts; + + public UnweightedJacknifeStateStrategy() + { + samples = new int[0]; + counts = new int[0]; + } + + private UnweightedJacknifeStateStrategy(int[] samples, int[] counts) + { + this.samples = samples; + this.counts = counts; + } + + private UnweightedJacknifeStateStrategy(UnweightedJacknifeStateStrategy other) + { + this.samples = Arrays.copyOf(other.samples, other.samples.length); + this.counts = Arrays.copyOf(other.counts, other.counts.length); + } + + @Override + public void add(int sample) + { + increment(sample, 1); + } + + @Override + public double calculateEntropy() + { + double sumWeight = 0; + double sumWeightLogWeight = 0; + long n = 0; + for (int i = 0; i < counts.length; i++) { + sumWeight += counts[i]; + sumWeightLogWeight += counts[i] == 0 ? 0 : counts[i] * Math.log(counts[i]); + n += counts[i]; + } + if (sumWeight == 0.0) { + return 0.0; + } + + double entropy = n * EntropyCalculations.calculateEntropyFromAggregates(sumWeight, sumWeightLogWeight); + for (int i = 0; i < counts.length; i++) { + if (counts[i] > 0.0) { + entropy -= EntropyCalculations.getHoldOutEntropy( + n, + sumWeight, + sumWeightLogWeight, + counts[i], + 1, + counts[i]); + } + } + return entropy; + } + + @Override + public long getEstimatedSize() + { + return INSTANCE_SIZE + + SizeOf.sizeOf(samples) + + SizeOf.sizeOf(counts); + } + + @Override + public int getRequiredBytesForSpecificSerialization() + { + return SizeOf.SIZE_OF_INT + // size + samples.length * (SizeOf.SIZE_OF_INT + SizeOf.SIZE_OF_INT); // arrays + } + + @Override + public void mergeWith(DiscreteEntropyStateStrategy other) + { + UnweightedJacknifeStateStrategy otherStrategy = (UnweightedJacknifeStateStrategy) other; + for (int i = 0; i < otherStrategy.samples.length; ++i) { + increment(otherStrategy.samples[i], otherStrategy.counts[i]); + } + } + + public static UnweightedJacknifeStateStrategy deserialize(SliceInput input) + { + int size = input.readInt(); + int[] samples = new int[size]; + input.readBytes( + Slices.wrappedIntArray(samples), + size * SizeOf.SIZE_OF_INT); + int[] counts = new int[size]; + input.readBytes( + Slices.wrappedIntArray(counts), + size * SizeOf.SIZE_OF_INT); + checkState(IntStream.range(0, samples.length - 1).noneMatch(i -> samples[i] > samples[i + 1]), "weights must be sorted"); + checkState(!Arrays.stream(counts).filter(c -> c < 0).findFirst().isPresent(), "Counts must be non-negative"); + + return new UnweightedJacknifeStateStrategy(samples, counts); + } + + @Override + public void serialize(SliceOutput out) + { + out.appendInt(samples.length); + IntStream.range(0, samples.length).forEach(i -> out.appendInt(samples[i])); + IntStream.range(0, counts.length).forEach(i -> out.appendInt(counts[i])); + } + + @Override + public DiscreteEntropyStateStrategy clone() + { + return new UnweightedJacknifeStateStrategy(this); + } + + private void increment(int sample, int count) + { + checkArgument(count > 0, "Count must be positive"); + int foundIndex = lowerBoundBinarySearch(sample); + if (foundIndex < samples.length && samples[foundIndex] == sample) { + counts[foundIndex] += count; + return; + } + + int[] newSamples = new int[samples.length + 1]; + System.arraycopy(samples, 0, newSamples, 0, foundIndex); + newSamples[foundIndex] = sample; + System.arraycopy(samples, foundIndex, newSamples, foundIndex + 1, samples.length - foundIndex); + samples = newSamples; + + int[] newCounts = new int[counts.length + 1]; + System.arraycopy(counts, 0, newCounts, 0, foundIndex); + newCounts[foundIndex] = count; + System.arraycopy(counts, foundIndex, newCounts, foundIndex + 1, counts.length - foundIndex); + counts = newCounts; + } + + private int lowerBoundBinarySearch(int sample) + { + int count = samples.length; + int first = 0; + while (count > 0) { + int step = count / 2; + int index = first + step; + if (samples[index] < sample) { + first = index + 1; + count -= step + 1; + } + else { + count = step; + } + } + return first; + } +} diff --git a/presto-main/src/main/java/com/facebook/presto/operator/aggregation/discreteentropy/UnweightedMleStateStrategy.java b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/discreteentropy/UnweightedMleStateStrategy.java new file mode 100644 index 0000000000000..81cd36efb6c02 --- /dev/null +++ b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/discreteentropy/UnweightedMleStateStrategy.java @@ -0,0 +1,161 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.operator.aggregation.discreteentropy; + +import io.airlift.slice.SizeOf; +import io.airlift.slice.SliceInput; +import io.airlift.slice.SliceOutput; +import io.airlift.slice.Slices; +import org.openjdk.jol.info.ClassLayout; + +import java.util.Arrays; +import java.util.stream.IntStream; + +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Preconditions.checkState; + +public class UnweightedMleStateStrategy + implements DiscreteEntropyStateStrategy +{ + private static final int INSTANCE_SIZE = ClassLayout.parseClass(UnweightedMleStateStrategy.class).instanceSize(); + private int[] samples; + private int[] counts; + + public UnweightedMleStateStrategy() + { + samples = new int[0]; + counts = new int[0]; + } + + private UnweightedMleStateStrategy(int[] samples, int[] counts) + { + this.samples = samples; + this.counts = counts; + } + + private UnweightedMleStateStrategy(UnweightedMleStateStrategy other) + { + this.samples = Arrays.copyOf(other.samples, other.samples.length); + this.counts = Arrays.copyOf(other.counts, other.counts.length); + } + + @Override + public void add(int sample) + { + increment(sample, 1); + } + + @Override + public double calculateEntropy() + { + return EntropyCalculations.calculateEntropy(counts); + } + + @Override + public long getEstimatedSize() + { + return INSTANCE_SIZE + + SizeOf.sizeOf(samples) + + SizeOf.sizeOf(counts); + } + + @Override + public int getRequiredBytesForSpecificSerialization() + { + int size = SizeOf.SIZE_OF_INT + // length + samples.length * (SizeOf.SIZE_OF_INT + SizeOf.SIZE_OF_INT); + return SizeOf.SIZE_OF_INT + // length + samples.length * (SizeOf.SIZE_OF_INT + SizeOf.SIZE_OF_INT); // arrays + } + + @Override + public void mergeWith(DiscreteEntropyStateStrategy other) + { + UnweightedMleStateStrategy otherStrategy = (UnweightedMleStateStrategy) other; + for (int i = 0; i < otherStrategy.samples.length; ++i) { + increment(otherStrategy.samples[i], otherStrategy.counts[i]); + } + } + + public static UnweightedMleStateStrategy deserialize(SliceInput input) + { + int size = input.readInt(); + int[] samples = new int[size]; + input.readBytes( + Slices.wrappedIntArray(samples), + size * SizeOf.SIZE_OF_INT); + int[] counts = new int[size]; + input.readBytes( + Slices.wrappedIntArray(counts), + size * SizeOf.SIZE_OF_INT); + checkState(IntStream.range(0, samples.length - 1).noneMatch(i -> samples[i] > samples[i + 1]), "weights must be sorted"); + checkState(!Arrays.stream(counts).filter(c -> c < 0).findFirst().isPresent(), "Counts must be non-negative"); + + return new UnweightedMleStateStrategy(samples, counts); + } + + @Override + public void serialize(SliceOutput out) + { + out.appendInt(samples.length); + IntStream.range(0, samples.length).forEach(i -> out.appendInt(samples[i])); + IntStream.range(0, counts.length).forEach(i -> out.appendInt(counts[i])); + } + + @Override + public DiscreteEntropyStateStrategy clone() + { + return new UnweightedMleStateStrategy(this); + } + + private void increment(int sample, int count) + { + checkArgument(count > 0, "Count must be positive"); + int foundIndex = lowerBoundBinarySearch(sample); + if (foundIndex < samples.length && samples[foundIndex] == sample) { + counts[foundIndex] += count; + return; + } + + int[] newSamples = new int[samples.length + 1]; + System.arraycopy(samples, 0, newSamples, 0, foundIndex); + newSamples[foundIndex] = sample; + System.arraycopy(samples, foundIndex, newSamples, foundIndex + 1, samples.length - foundIndex); + samples = newSamples; + + int[] newCounts = new int[counts.length + 1]; + System.arraycopy(counts, 0, newCounts, 0, foundIndex); + newCounts[foundIndex] = count; + System.arraycopy(counts, foundIndex, newCounts, foundIndex + 1, counts.length - foundIndex); + counts = newCounts; + } + + private int lowerBoundBinarySearch(int sample) + { + int count = samples.length; + int first = 0; + while (count > 0) { + int step = count / 2; + int index = first + step; + if (samples[index] < sample) { + first = index + 1; + count -= step + 1; + } + else { + count = step; + } + } + return first; + } +} diff --git a/presto-main/src/main/java/com/facebook/presto/operator/aggregation/discreteentropy/WeightedJacknifeStateStrategy.java b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/discreteentropy/WeightedJacknifeStateStrategy.java new file mode 100644 index 0000000000000..12da2cc6730b7 --- /dev/null +++ b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/discreteentropy/WeightedJacknifeStateStrategy.java @@ -0,0 +1,207 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.operator.aggregation.discreteentropy; + +import io.airlift.slice.SizeOf; +import io.airlift.slice.SliceInput; +import io.airlift.slice.SliceOutput; +import io.airlift.slice.Slices; +import org.openjdk.jol.info.ClassLayout; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; +import java.util.stream.IntStream; + +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Preconditions.checkState; + +public class WeightedJacknifeStateStrategy + implements DiscreteEntropyStateStrategy +{ + private static final int INSTANCE_SIZE = ClassLayout.parseClass(WeightedJacknifeStateStrategy.class).instanceSize(); + private int[] samples; + private double[] weights; + private int[] counts; + + public WeightedJacknifeStateStrategy() + { + samples = new int[0]; + weights = new double[0]; + counts = new int[0]; + } + + private WeightedJacknifeStateStrategy(int[] samples, double[] weights, int[] counts) + { + this.samples = samples; + this.weights = weights; + this.counts = counts; + } + + private WeightedJacknifeStateStrategy(WeightedJacknifeStateStrategy other) + { + this.samples = Arrays.copyOf(other.samples, other.samples.length); + this.weights = Arrays.copyOf(other.weights, other.weights.length); + this.counts = Arrays.copyOf(other.counts, other.counts.length); + } + + @Override + public void add(int sample, double weight) + { + increment(sample, weight, 1); + } + + @Override + public double calculateEntropy() + { + Map bucketWeights = new HashMap<>(); + long n = 0; + for (int i = 0; i < samples.length; i++) { + bucketWeights.put(samples[i], counts[i] * weights[i] + bucketWeights.getOrDefault(samples[i], 0.0)); + n += counts[i]; + } + double sumWeight = bucketWeights.values().stream().mapToDouble(Double::doubleValue).sum(); + if (sumWeight == 0.0) { + return 0.0; + } + double sumWeightLogWeight = + bucketWeights.values().stream().mapToDouble(w -> w == 0.0 ? 0.0 : w * Math.log(w)).sum(); + + double entropy = n * EntropyCalculations.calculateEntropyFromAggregates(sumWeight, sumWeightLogWeight); + for (int i = 0; i < samples.length; i++) { + double weight = bucketWeights.get(samples[i]); + if (weight > 0.0) { + entropy -= EntropyCalculations.getHoldOutEntropy( + n, + sumWeight, + sumWeightLogWeight, + weight, + weights[i], + counts[i]); + } + } + return entropy; + } + + @Override + public long getEstimatedSize() + { + return INSTANCE_SIZE + + SizeOf.sizeOf(samples) + + SizeOf.sizeOf(weights) + + SizeOf.sizeOf(counts); + } + + @Override + public int getRequiredBytesForSpecificSerialization() + { + return SizeOf.SIZE_OF_INT + // size + samples.length * (SizeOf.SIZE_OF_INT + SizeOf.SIZE_OF_DOUBLE + SizeOf.SIZE_OF_INT); // arrays + } + + @Override + public void mergeWith(DiscreteEntropyStateStrategy other) + { + WeightedJacknifeStateStrategy otherStrategy = (WeightedJacknifeStateStrategy) other; + for (int i = 0; i < otherStrategy.samples.length; ++i) { + checkState(otherStrategy.weights[i] >= 0, "Weights must be nonnegative"); + increment(otherStrategy.samples[i], otherStrategy.weights[i], otherStrategy.counts[i]); + } + } + + public static WeightedJacknifeStateStrategy deserialize(SliceInput input) + { + int size = input.readInt(); + int[] samples = new int[size]; + input.readBytes( + Slices.wrappedIntArray(samples), + size * SizeOf.SIZE_OF_INT); + double[] weights = new double[size]; + input.readBytes( + Slices.wrappedDoubleArray(weights), + size * SizeOf.SIZE_OF_DOUBLE); + int[] counts = new int[size]; + input.readBytes( + Slices.wrappedIntArray(counts), + size * SizeOf.SIZE_OF_INT); + + return new WeightedJacknifeStateStrategy(samples, weights, counts); + } + + @Override + public void serialize(SliceOutput out) + { + out.appendInt(samples.length); + IntStream.range(0, samples.length).forEach(i -> out.appendInt(samples[i])); + IntStream.range(0, weights.length).forEach(i -> out.appendDouble(weights[i])); + IntStream.range(0, counts.length).forEach(i -> out.appendInt(counts[i])); + } + + @Override + public DiscreteEntropyStateStrategy clone() + { + return new WeightedJacknifeStateStrategy(this); + } + + private void increment(int sample, double weight, int count) + { + checkArgument(weight >= 0, "Weight must be non-negative"); + checkArgument(count > 0, "Count must be positive"); + if (weight == 0.0 || count == 0) { + return; + } + + int foundIndex = lowerBoundBinarySearch(sample, weight); + if (foundIndex < samples.length && samples[foundIndex] == sample && weights[foundIndex] == weight) { + counts[foundIndex] += count; + return; + } + + int[] newSamples = new int[samples.length + 1]; + System.arraycopy(samples, 0, newSamples, 0, foundIndex); + newSamples[foundIndex] = sample; + System.arraycopy(samples, foundIndex, newSamples, foundIndex + 1, samples.length - foundIndex); + samples = newSamples; + + double[] newWeights = new double[weights.length + 1]; + System.arraycopy(weights, 0, newWeights, 0, foundIndex); + newWeights[foundIndex] = weight; + System.arraycopy(weights, foundIndex, newWeights, foundIndex + 1, weights.length - foundIndex); + weights = newWeights; + + int[] newCounts = new int[counts.length + 1]; + System.arraycopy(counts, 0, newCounts, 0, foundIndex); + newCounts[foundIndex] = count; + System.arraycopy(counts, foundIndex, newCounts, foundIndex + 1, counts.length - foundIndex); + counts = newCounts; + } + + private int lowerBoundBinarySearch(int sample, double weight) + { + int count = samples.length; + int first = 0; + while (count > 0) { + int step = count / 2; + int index = first + step; + if (samples[index] < sample || (samples[index] == sample && weights[index] < weight)) { + first = index + 1; + count -= step + 1; + } + else { + count = step; + } + } + return first; + } +} diff --git a/presto-main/src/main/java/com/facebook/presto/operator/aggregation/discreteentropy/WeightedMleStateStrategy.java b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/discreteentropy/WeightedMleStateStrategy.java new file mode 100644 index 0000000000000..6260cf1b7c157 --- /dev/null +++ b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/discreteentropy/WeightedMleStateStrategy.java @@ -0,0 +1,158 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.operator.aggregation.discreteentropy; + +import io.airlift.slice.SizeOf; +import io.airlift.slice.SliceInput; +import io.airlift.slice.SliceOutput; +import io.airlift.slice.Slices; +import org.openjdk.jol.info.ClassLayout; + +import java.util.Arrays; +import java.util.stream.IntStream; + +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Preconditions.checkState; + +public class WeightedMleStateStrategy + implements DiscreteEntropyStateStrategy +{ + private static final int INSTANCE_SIZE = ClassLayout.parseClass(WeightedMleStateStrategy.class).instanceSize(); + private int[] samples; + private double[] weights; + + public WeightedMleStateStrategy() + { + samples = new int[0]; + weights = new double[0]; + } + + private WeightedMleStateStrategy(int[] samples, double[] weights) + { + this.samples = samples; + this.weights = weights; + } + + private WeightedMleStateStrategy(WeightedMleStateStrategy other) + { + this.samples = Arrays.copyOf(other.samples, other.samples.length); + this.weights = Arrays.copyOf(other.weights, other.weights.length); + } + + @Override + public void add(int sample, double weight) + { + increment(sample, weight); + } + + @Override + public double calculateEntropy() + { + return EntropyCalculations.calculateEntropy(weights); + } + + @Override + public long getEstimatedSize() + { + return INSTANCE_SIZE + + SizeOf.sizeOf(samples) + + SizeOf.sizeOf(weights); + } + + @Override + public int getRequiredBytesForSpecificSerialization() + { + return SizeOf.SIZE_OF_INT + // size + samples.length * (SizeOf.SIZE_OF_INT + SizeOf.SIZE_OF_DOUBLE); // arrays + } + + @Override + public void mergeWith(DiscreteEntropyStateStrategy other) + { + WeightedMleStateStrategy otherStrategy = (WeightedMleStateStrategy) other; + for (int i = 0; i < otherStrategy.samples.length; ++i) { + increment(otherStrategy.samples[i], otherStrategy.weights[i]); + } + } + + public static WeightedMleStateStrategy deserialize(SliceInput input) + { + int size = input.readInt(); + int[] samples = new int[size]; + input.readBytes( + Slices.wrappedIntArray(samples), + size * SizeOf.SIZE_OF_INT); + double[] weights = new double[size]; + input.readBytes( + Slices.wrappedDoubleArray(weights), + size * SizeOf.SIZE_OF_DOUBLE); + checkState(IntStream.range(0, samples.length - 1).noneMatch(i -> samples[i] > samples[i + 1]), "weights must be sorted"); + checkState(!Arrays.stream(weights).filter(w -> w < 0).findFirst().isPresent(), "Weights must be non-negative"); + return new WeightedMleStateStrategy(samples, weights); + } + + @Override + public void serialize(SliceOutput out) + { + out.appendInt(samples.length); + IntStream.range(0, samples.length).forEach(i -> out.appendInt(samples[i])); + IntStream.range(0, weights.length).forEach(i -> out.appendDouble(weights[i])); + } + + @Override + public DiscreteEntropyStateStrategy clone() + { + return new WeightedMleStateStrategy(this); + } + + private void increment(int sample, double weight) + { + checkArgument(weight >= 0, "Count must be positive"); + int foundIndex = lowerBoundBinarySearch(sample); + if (foundIndex < samples.length && samples[foundIndex] == sample) { + weights[foundIndex] += weight; + return; + } + + int[] newSamples = new int[samples.length + 1]; + System.arraycopy(samples, 0, newSamples, 0, foundIndex); + newSamples[foundIndex] = sample; + System.arraycopy(samples, foundIndex, newSamples, foundIndex + 1, samples.length - foundIndex); + samples = newSamples; + + double[] newWeights = new double[weights.length + 1]; + System.arraycopy(weights, 0, newWeights, 0, foundIndex); + newWeights[foundIndex] = weight; + System.arraycopy(weights, foundIndex, newWeights, foundIndex + 1, weights.length - foundIndex); + weights = newWeights; + } + + private int lowerBoundBinarySearch(int sample) + { + int count = samples.length; + int first = 0; + while (count > 0) { + int step = count / 2; + int index = first + step; + if (samples[index] < sample) { + first = index + 1; + count -= step + 1; + } + else { + count = step; + } + } + return first; + } +} diff --git a/presto-main/src/main/java/com/facebook/presto/operator/aggregation/reservoirsample/UnweightedDoubleReservoirSample.java b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/reservoirsample/UnweightedDoubleReservoirSample.java index b368cc2d032f1..57d527db14a27 100644 --- a/presto-main/src/main/java/com/facebook/presto/operator/aggregation/reservoirsample/UnweightedDoubleReservoirSample.java +++ b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/reservoirsample/UnweightedDoubleReservoirSample.java @@ -115,6 +115,11 @@ public void mergeWith(UnweightedDoubleReservoirSample other) samples = merged; } + public int getTotalPopulationCount() + { + return seenCount; + } + @Override public UnweightedDoubleReservoirSample clone() { diff --git a/presto-main/src/main/java/com/facebook/presto/operator/aggregation/reservoirsample/WeightedDoubleReservoirSample.java b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/reservoirsample/WeightedDoubleReservoirSample.java index fa24428b7f398..5d4519af7d5d2 100644 --- a/presto-main/src/main/java/com/facebook/presto/operator/aggregation/reservoirsample/WeightedDoubleReservoirSample.java +++ b/presto-main/src/main/java/com/facebook/presto/operator/aggregation/reservoirsample/WeightedDoubleReservoirSample.java @@ -35,6 +35,7 @@ public class WeightedDoubleReservoirSample private int count; private double[] samples; private double[] weights; + private double totalPopulationWeight; public WeightedDoubleReservoirSample(int maxSamples) { @@ -52,13 +53,15 @@ private WeightedDoubleReservoirSample(WeightedDoubleReservoirSample other) this.count = other.count; this.samples = Arrays.copyOf(other.samples, other.samples.length); this.weights = Arrays.copyOf(other.weights, other.weights.length); + this.totalPopulationWeight = other.totalPopulationWeight; } - private WeightedDoubleReservoirSample(int count, double[] samples, double[] weights) + private WeightedDoubleReservoirSample(int count, double[] samples, double[] weights, double totalPopulationWeight) { this.count = count; this.samples = requireNonNull(samples, "samples is null"); this.weights = requireNonNull(weights, "weights is null"); + this.totalPopulationWeight = totalPopulationWeight; } public long getMaxSamples() @@ -69,6 +72,7 @@ public long getMaxSamples() public void add(double sample, double weight) { checkArgument(weight >= 0, format("Weight %s cannot be negative", weight)); + totalPopulationWeight += weight; double adjustedWeight = Math.pow( ThreadLocalRandom.current().nextDouble(), 1.0 / weight); @@ -79,7 +83,6 @@ private void addWithAdjustedWeight(double sample, double adjustedWeight) { if (count < samples.length) { samples[count] = sample; - weights[count] = adjustedWeight; count++; bubbleUp(); return; @@ -96,6 +99,7 @@ private void addWithAdjustedWeight(double sample, double adjustedWeight) public void mergeWith(WeightedDoubleReservoirSample other) { + totalPopulationWeight += other.totalPopulationWeight; for (int i = 0; i < other.count; i++) { addWithAdjustedWeight(other.samples[i], other.weights[i]); } @@ -112,12 +116,6 @@ public double[] getSamples() return Arrays.copyOf(samples, count); } - private void checkArguments() - { - checkArgument(samples.length > 0, "Number of reservoir samples must be strictly positive"); - checkArgument(count <= samples.length, "Size must be at most number of samples"); - } - private void swap(int i, int j) { double tmpElement = samples[i]; @@ -182,7 +180,8 @@ public static WeightedDoubleReservoirSample deserialize(SliceInput input) input.readBytes(Slices.wrappedDoubleArray(samples), count * SizeOf.SIZE_OF_DOUBLE); double[] weights = new double[maxSamples]; input.readBytes(Slices.wrappedDoubleArray(weights), count * SizeOf.SIZE_OF_DOUBLE); - return new WeightedDoubleReservoirSample(count, samples, weights); + double totalPopulationWeight = input.readDouble(); + return new WeightedDoubleReservoirSample(count, samples, weights, totalPopulationWeight); } public void serialize(SliceOutput output) @@ -195,12 +194,14 @@ public void serialize(SliceOutput output) for (int i = 0; i < count; i++) { output.appendDouble(weights[i]); } + output.appendDouble(totalPopulationWeight); } public int getRequiredBytesForSerialization() { return SizeOf.SIZE_OF_INT + // count - SizeOf.SIZE_OF_INT + 2 * SizeOf.SIZE_OF_DOUBLE * Math.min(count, samples.length); // samples, weights + SizeOf.SIZE_OF_INT + 2 * SizeOf.SIZE_OF_DOUBLE * Math.min(count, samples.length) + // samples, weights + SizeOf.SIZE_OF_DOUBLE; // totalPopulationWeight; } public long estimatedInMemorySize() @@ -209,4 +210,9 @@ public long estimatedInMemorySize() SizeOf.sizeOf(samples) + SizeOf.sizeOf(weights); } + + public double getTotalPopulationWeight() + { + return totalPopulationWeight; + } } diff --git a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialentropy/AbstractTestReservoirAggregation.java b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialentropy/AbstractTestReservoirAggregation.java index e2f75735375e8..e90aa2a586620 100644 --- a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialentropy/AbstractTestReservoirAggregation.java +++ b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialentropy/AbstractTestReservoirAggregation.java @@ -15,7 +15,7 @@ import com.facebook.presto.operator.aggregation.AbstractTestAggregationFunction; -import static com.facebook.presto.operator.aggregation.differentialentropy.EntropyCalculations.calculateFromSamples; +import static com.facebook.presto.operator.aggregation.differentialentropy.EntropyCalculations.calculateFromSamplesUsingVasicek; import static org.testng.Assert.assertTrue; abstract class AbstractTestReservoirAggregation @@ -32,12 +32,12 @@ protected String getFunctionName() @Override public Double getExpectedValue(int start, int length) { - assertTrue(length < MAX_SAMPLES); + assertTrue(2 * length < MAX_SAMPLES); double[] samples = new double[2 * length]; for (int i = 0; i < length; i++) { samples[i] = (double) (start + i); samples[i + length] = (double) (start + i); } - return calculateFromSamples(samples); + return calculateFromSamplesUsingVasicek(samples); } } diff --git a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialentropy/AbstractTestStateStrategy.java b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialentropy/AbstractTestStateStrategy.java index b44fd00b5887c..c690f0d15ac37 100644 --- a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialentropy/AbstractTestStateStrategy.java +++ b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialentropy/AbstractTestStateStrategy.java @@ -26,10 +26,14 @@ abstract class AbstractTestStateStrategy protected static final double MAX = 10.0; private final Function strategySupplier; + private final boolean weighted; - protected AbstractTestStateStrategy(Function strategySupplier) + protected AbstractTestStateStrategy( + Function strategySupplier, + boolean weighted) { this.strategySupplier = strategySupplier; + this.weighted = weighted; } @Test @@ -38,7 +42,13 @@ public void testUniformDistribution() DifferentialEntropyStateStrategy strategy = strategySupplier.apply(2000); Random random = new Random(13); for (int i = 0; i < 9_999_999; i++) { - strategy.add(10 * random.nextFloat(), 1.0); + double value = 10 * random.nextFloat(); + if (weighted) { + strategy.add(value, 1.0); + } + else { + strategy.add(value); + } } double expected = Math.log(10) / Math.log(2); assertEquals(strategy.calculateEntropy(), expected, 0.1); @@ -51,7 +61,13 @@ public void testNormalDistribution() Random random = new Random(13); double sigma = 0.5; for (int i = 0; i < 9_999_999; i++) { - strategy.add(5 + sigma * random.nextGaussian(), 1.0); + double value = 5 + sigma * random.nextGaussian(); + if (weighted) { + strategy.add(value, 1.0); + } + else { + strategy.add(value); + } } double expected = 0.5 * Math.log(2 * Math.PI * Math.E * sigma * sigma) / Math.log(2); assertEquals(strategy.calculateEntropy(), expected, 0.02); diff --git a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialentropy/TestEntropyCalculations.java b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialentropy/TestEntropyCalculations.java index 23f502587d34e..13b8b5b090cd1 100644 --- a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialentropy/TestEntropyCalculations.java +++ b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialentropy/TestEntropyCalculations.java @@ -17,7 +17,7 @@ import java.util.Random; -import static com.facebook.presto.operator.aggregation.differentialentropy.EntropyCalculations.calculateFromSamples; +import static com.facebook.presto.operator.aggregation.differentialentropy.EntropyCalculations.calculateFromSamplesUsingVasicek; import static org.testng.Assert.assertEquals; public class TestEntropyCalculations @@ -30,7 +30,7 @@ public void testUniformDistribution() for (int i = 0; i < samples.length; i++) { samples[i] = random.nextDouble(); } - assertEquals(calculateFromSamples(samples), 0, 0.02); + assertEquals(calculateFromSamplesUsingVasicek(samples), 0, 0.02); } @Test @@ -43,6 +43,6 @@ public void testNormalDistribution() samples[i] = 5 + sigma * random.nextGaussian(); } double expected = 0.5 * Math.log(2 * Math.PI * Math.E * sigma * sigma) / Math.log(2); - assertEquals(calculateFromSamples(samples), expected, 0.02); + assertEquals(calculateFromSamplesUsingVasicek(samples), expected, 0.02); } } diff --git a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialentropy/TestFixedHistogramJacknifeAggregation.java b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialentropy/TestFixedHistogramJacknifeAggregation.java index 9ded83efee00b..f6bc7f7620f1a 100644 --- a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialentropy/TestFixedHistogramJacknifeAggregation.java +++ b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialentropy/TestFixedHistogramJacknifeAggregation.java @@ -29,7 +29,7 @@ public class TestFixedHistogramJacknifeAggregation { public TestFixedHistogramJacknifeAggregation() { - super(DifferentialEntropyAggregation.FIXED_HISTOGRAM_JACKNIFE_METHOD_NAME); + super(DifferentialEntropyStateStrategy.FIXED_HISTOGRAM_JACKNIFE_METHOD_NAME); } @Test( diff --git a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialentropy/TestFixedHistogramJacknifeStateStrategy.java b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialentropy/TestFixedHistogramJacknifeStateStrategy.java index 4fe9d3a45334f..97347d5d92197 100644 --- a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialentropy/TestFixedHistogramJacknifeStateStrategy.java +++ b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialentropy/TestFixedHistogramJacknifeStateStrategy.java @@ -18,6 +18,6 @@ public class TestFixedHistogramJacknifeStateStrategy { public TestFixedHistogramJacknifeStateStrategy() { - super(size -> new FixedHistogramJacknifeStateStrategy(size, AbstractTestStateStrategy.MIN, AbstractTestStateStrategy.MAX)); + super(size -> new FixedHistogramJacknifeStateStrategy(size, AbstractTestStateStrategy.MIN, AbstractTestStateStrategy.MAX), true); } } diff --git a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialentropy/TestFixedHistogramMleAggregation.java b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialentropy/TestFixedHistogramMleAggregation.java index 0f2332dea7b8a..8b5fb376f166f 100644 --- a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialentropy/TestFixedHistogramMleAggregation.java +++ b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialentropy/TestFixedHistogramMleAggregation.java @@ -29,7 +29,7 @@ public class TestFixedHistogramMleAggregation { public TestFixedHistogramMleAggregation() { - super(DifferentialEntropyAggregation.FIXED_HISTOGRAM_MLE_METHOD_NAME); + super(DifferentialEntropyStateStrategy.FIXED_HISTOGRAM_MLE_METHOD_NAME); } @Test( diff --git a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialentropy/TestFixedHistogramMleStateStrategy.java b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialentropy/TestFixedHistogramMleStateStrategy.java index abb538587298a..3075a24c1eff9 100644 --- a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialentropy/TestFixedHistogramMleStateStrategy.java +++ b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialentropy/TestFixedHistogramMleStateStrategy.java @@ -18,6 +18,6 @@ public class TestFixedHistogramMleStateStrategy { public TestFixedHistogramMleStateStrategy() { - super(bucketCount -> new FixedHistogramMleStateStrategy(bucketCount, MIN, MAX)); + super(bucketCount -> new FixedHistogramMleStateStrategy(bucketCount, MIN, MAX), true); } } diff --git a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialentropy/TestIllegalMethodAggregation.java b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialentropy/TestIllegalMethodAggregation.java index 8cbc2367244dd..ba071b0686fe5 100644 --- a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialentropy/TestIllegalMethodAggregation.java +++ b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialentropy/TestIllegalMethodAggregation.java @@ -59,7 +59,6 @@ public void testNullMethod() "differential_entropy", fromTypes(BIGINT, DOUBLE, DOUBLE, VARCHAR, DOUBLE, DOUBLE))); createStringsBlock((String) null); - System.out.println("foo"); aggregation( function, createLongsBlock(200), diff --git a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialentropy/TestUnweightedReservoirSampleStateStrategy.java b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialentropy/TestUnweightedReservoirSampleStateStrategy.java index 8f75ba282e4d4..bc0b0c7a4e634 100644 --- a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialentropy/TestUnweightedReservoirSampleStateStrategy.java +++ b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialentropy/TestUnweightedReservoirSampleStateStrategy.java @@ -18,6 +18,6 @@ public class TestUnweightedReservoirSampleStateStrategy { public TestUnweightedReservoirSampleStateStrategy() { - super(size -> new UnweightedReservoirSampleStateStrategy(size)); + super(size -> new UnweightedReservoirSampleStateStrategy(size), false); } } diff --git a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialentropy/TestWeightedReservoirSampleStateStrategy.java b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialentropy/TestWeightedReservoirSampleStateStrategy.java index 61126b12513fe..e8f76b443bc00 100644 --- a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialentropy/TestWeightedReservoirSampleStateStrategy.java +++ b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialentropy/TestWeightedReservoirSampleStateStrategy.java @@ -18,6 +18,6 @@ public class TestWeightedReservoirSampleStateStrategy { public TestWeightedReservoirSampleStateStrategy() { - super(size -> new WeightedReservoirSampleStateStrategy(size)); + super(size -> new WeightedReservoirSampleStateStrategy(size), true); } } diff --git a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/AbstractTestFixedHistogramAggregation.java b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/AbstractTestFixedHistogramAggregation.java new file mode 100644 index 0000000000000..53582c65eef70 --- /dev/null +++ b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/AbstractTestFixedHistogramAggregation.java @@ -0,0 +1,115 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.operator.aggregation.differentialmutualinformationclassification; + +import com.facebook.presto.operator.aggregation.AbstractTestAggregationFunction; +import com.facebook.presto.spi.block.Block; +import com.facebook.presto.spi.block.BlockBuilder; +import com.facebook.presto.spi.type.StandardTypes; +import com.google.common.collect.ImmutableList; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static com.facebook.presto.block.BlockAssertions.createRLEBlock; +import static com.facebook.presto.spi.type.BigintType.BIGINT; +import static com.facebook.presto.spi.type.DoubleType.DOUBLE; + +abstract class AbstractTestFixedHistogramAggregation + extends AbstractTestAggregationFunction +{ + protected static final int MAX_SAMPLES = 500; + protected static final int LENGTH_FACTOR = 8; + protected static final int TRUE_FACTOR = 4; + private static final int NUM_BINS = 5; + protected final String method; + + protected AbstractTestFixedHistogramAggregation(String method) + { + this.method = method; + } + + @Override + public Block[] getSequenceBlocks(int start, int length) + { + int positionCount = LENGTH_FACTOR * length; + BlockBuilder outcomes = BIGINT.createBlockBuilder(null, positionCount); + BlockBuilder samples = DOUBLE.createBlockBuilder(null, positionCount); + BlockBuilder weights = DOUBLE.createBlockBuilder(null, positionCount); + for (int i = start; i < start + LENGTH_FACTOR * length; i++) { + BIGINT.writeLong(outcomes, i % TRUE_FACTOR == 0 ? 0 : 1); + DOUBLE.writeDouble(samples, Math.abs(i) % NUM_BINS); + DOUBLE.writeDouble(weights, Math.abs(i) % 3 + 1); + } + + return new Block[] { + createRLEBlock(NUM_BINS, positionCount), + outcomes.build(), + samples.build(), + weights.build(), + createRLEBlock(this.method, positionCount), + createRLEBlock(0.0, positionCount), + createRLEBlock((double) NUM_BINS, positionCount) + }; + } + + @Override + protected String getFunctionName() + { + return "normalized_differential_mutual_information_classification"; + } + + @Override + protected List getFunctionParameterTypes() + { + return ImmutableList.of( + StandardTypes.BIGINT, + StandardTypes.BIGINT, + StandardTypes.DOUBLE, + StandardTypes.DOUBLE, + StandardTypes.VARCHAR, + StandardTypes.DOUBLE, + StandardTypes.DOUBLE); + } + + protected static void generateOutcomesSamplesAndWeights(int start, int length, List outcomes, List samples, List weights) + { + for (int i = start; i < start + LENGTH_FACTOR * length; i++) { + outcomes.add(i % TRUE_FACTOR == 0 ? 0 : 1); + samples.add((double) (Math.abs(i) % NUM_BINS)); + weights.add((double) (Math.abs(i) % 3 + 1)); + } + } + + protected static double calculateEntropy(List samples, List weights) + { + double totalWeight = weights.stream().mapToDouble(weight -> weight).sum(); + if (totalWeight == 0.0) { + return Double.NaN; + } + + Map bucketWeights = new HashMap<>(); + for (int i = 0; i < samples.size(); i++) { + double sample = samples.get(i); + double weight = weights.get(i); + bucketWeights.put(sample, bucketWeights.getOrDefault(sample, 0.0) + weight); + } + + double entropy = bucketWeights.values().stream() + .mapToDouble(weight -> weight == 0.0 ? 0.0 : weight / totalWeight * Math.log(totalWeight / weight)) + .sum(); + return entropy / Math.log(2); + } +} diff --git a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/AbstractTestReservoirAggregation.java b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/AbstractTestReservoirAggregation.java new file mode 100644 index 0000000000000..6d88d2d3c7bf7 --- /dev/null +++ b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/AbstractTestReservoirAggregation.java @@ -0,0 +1,30 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.operator.aggregation.differentialmutualinformationclassification; + +import com.facebook.presto.operator.aggregation.AbstractTestAggregationFunction; + +abstract class AbstractTestReservoirAggregation + extends AbstractTestAggregationFunction +{ + protected static final int MAX_SAMPLES = 500; + protected static final int LENGTH_FACTOR = 8; + protected static final int TRUE_FACTOR = 4; + + @Override + protected String getFunctionName() + { + return "normalized_differential_mutual_information_classification"; + } +} diff --git a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/AbstractTestStateStrategy.java b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/AbstractTestStateStrategy.java new file mode 100644 index 0000000000000..613b2de92ac62 --- /dev/null +++ b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/AbstractTestStateStrategy.java @@ -0,0 +1,90 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.operator.aggregation.differentialmutualinformationclassification; + +import com.facebook.presto.operator.aggregation.differentialentropy.DifferentialEntropyStateStrategy; +import org.testng.annotations.Test; + +import java.util.Random; +import java.util.function.Function; + +import static com.google.common.base.Preconditions.checkArgument; +import static org.testng.Assert.assertEquals; + +abstract class AbstractTestStateStrategy +{ + protected static final double MIN = 0.0; + protected static final double MAX = 10.0; + + private final Function strategySupplier; + private final boolean weighted; + + protected AbstractTestStateStrategy( + Function strategySupplier, + boolean weighted) + { + this.strategySupplier = strategySupplier; + this.weighted = weighted; + } + + @Test + public void testUniformDistribution() + { + assertEquals(calculateUniform(1), 1.0, 0.01); + } + + private double calculateUniform(double noise) + { + checkArgument(noise >= 0 && noise <= 1); + int size = 2_000; + DifferentialEntropyStateStrategy strategy = strategySupplier.apply(size); + DifferentialEntropyStateStrategy trueStrategy = strategySupplier.apply(size); + DifferentialEntropyStateStrategy falseStrategy = strategySupplier.apply(size); + Random random = new Random(13); + for (int i = 0; i < 9_999_999; i++) { + int outcome = random.nextBoolean() ? 1 : 0; + double value = MIN + (MAX - MIN) / 2 * random.nextFloat(); + if (outcome == 1) { + value += (MAX - MIN) / 2; + } + add(strategy, value); + if (outcome == 1) { + add(trueStrategy, value); + } + else { + add(falseStrategy, value); + } + } + double entropy = strategy.calculateEntropy(); + double trueEntropy = trueStrategy.calculateEntropy(); + double falseEntropy = falseStrategy.calculateEntropy(); + double totalTrueWeight = trueStrategy.getTotalPopulationWeight(); + double totalFalseWeight = falseStrategy.getTotalPopulationWeight(); + double reduced = entropy; + reduced -= trueEntropy * (totalTrueWeight / (totalTrueWeight + totalFalseWeight)); + reduced -= falseEntropy * (totalFalseWeight / (totalTrueWeight + totalFalseWeight)); + double mutualInformation = Math.min(1.0, Math.max(reduced / entropy, 0.0)); + return mutualInformation; + } + + private void add(DifferentialEntropyStateStrategy strategy, double value) + { + if (weighted) { + strategy.add(value, 1.0); + } + else { + strategy.add(value); + } + } +} diff --git a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/TestEntropyCalculations.java b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/TestEntropyCalculations.java new file mode 100644 index 0000000000000..d02355fd9acbb --- /dev/null +++ b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/TestEntropyCalculations.java @@ -0,0 +1,48 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.operator.aggregation.differentialmutualinformationclassification; + +import org.testng.annotations.Test; + +import java.util.Random; + +import static com.facebook.presto.operator.aggregation.differentialentropy.EntropyCalculations.calculateFromSamplesUsingVasicek; +import static org.testng.Assert.assertEquals; + +public class TestEntropyCalculations +{ + @Test + public void testUniformDistribution() + { + Random random = new Random(13); + double[] samples = new double[10000000]; + for (int i = 0; i < samples.length; i++) { + samples[i] = random.nextDouble(); + } + assertEquals(calculateFromSamplesUsingVasicek(samples), 0, 0.02); + } + + @Test + public void testNormalDistribution() + { + Random random = new Random(13); + double[] samples = new double[10000000]; + double sigma = 0.5; + for (int i = 0; i < samples.length; i++) { + samples[i] = 5 + sigma * random.nextGaussian(); + } + double expected = 0.5 * Math.log(2 * Math.PI * Math.E * sigma * sigma) / Math.log(2); + assertEquals(calculateFromSamplesUsingVasicek(samples), expected, 0.02); + } +} diff --git a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/TestFixedHistogramJacknifeAggregation.java b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/TestFixedHistogramJacknifeAggregation.java new file mode 100644 index 0000000000000..ce158b937aaa9 --- /dev/null +++ b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/TestFixedHistogramJacknifeAggregation.java @@ -0,0 +1,165 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.operator.aggregation.differentialmutualinformationclassification; + +import com.facebook.presto.operator.aggregation.differentialentropy.DifferentialEntropyStateStrategy; +import com.facebook.presto.spi.PrestoException; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.List; + +import static com.facebook.presto.block.BlockAssertions.createDoublesBlock; +import static com.facebook.presto.block.BlockAssertions.createLongsBlock; +import static com.facebook.presto.block.BlockAssertions.createStringsBlock; +import static com.facebook.presto.operator.aggregation.AggregationTestUtils.aggregation; + +public class TestFixedHistogramJacknifeAggregation + extends AbstractTestFixedHistogramAggregation +{ + public TestFixedHistogramJacknifeAggregation() + { + super(DifferentialEntropyStateStrategy.FIXED_HISTOGRAM_JACKNIFE_METHOD_NAME); + } + + @Test( + expectedExceptions = PrestoException.class, + expectedExceptionsMessageRegExp = "In differential_entropy UDF, bucket count must be non-negative: -200") + public void testIllegalBucketCount() + { + aggregation( + getFunction(), + createLongsBlock(-200), + createLongsBlock(1), + createDoublesBlock(0.1), + createDoublesBlock(0.2), + createStringsBlock(method), + createDoublesBlock(0.0), + createDoublesBlock(0.2)); + } + + @Test( + expectedExceptions = PrestoException.class, + expectedExceptionsMessageRegExp = "In differential_entropy UDF, weight must be non-negative: -0.2") + public void testNegativeWeight() + { + aggregation( + getFunction(), + createLongsBlock(200), + createLongsBlock(1), + createDoublesBlock(0.1), + createDoublesBlock(-0.2), + createStringsBlock(method), + createDoublesBlock(0.0), + createDoublesBlock(0.2)); + } + + @Test( + expectedExceptions = PrestoException.class, + expectedExceptionsMessageRegExp = "In differential_entropy UDF, sample must be at least min: sample=-100.0, min=0.0") + public void testTooSmallSample() + { + aggregation( + getFunction(), + createLongsBlock(200), + createLongsBlock(1), + createDoublesBlock(-100.0), + createDoublesBlock(0.2), + createStringsBlock(method), + createDoublesBlock(0.0), + createDoublesBlock(0.2)); + } + + @Test( + expectedExceptions = PrestoException.class, + expectedExceptionsMessageRegExp = "In differential_entropy UDF, sample must be at most max: sample=300.0, max=0.2") + public void testTooLargeSample() + { + aggregation( + getFunction(), + createLongsBlock(200), + createLongsBlock(1), + createDoublesBlock(300.0), + createDoublesBlock(0.2), + createStringsBlock(method), + createDoublesBlock(0.0), + createDoublesBlock(0.2)); + } + + @Test( + expectedExceptions = PrestoException.class, + expectedExceptionsMessageRegExp = "In differential_entropy UDF, min must be larger than max: min=0.2, max=0.1") + public void testIllegalMinMax() + { + aggregation( + getFunction(), + createLongsBlock(200), + createLongsBlock(1), + createDoublesBlock(0.1), + createDoublesBlock(0.2), + createStringsBlock(method), + createDoublesBlock(0.2), + createDoublesBlock(0.1)); + } + + @Override + public Double getExpectedValue(int start, int length) + { + List outcomes = new ArrayList<>(); + List samples = new ArrayList<>(); + List weights = new ArrayList<>(); + generateOutcomesSamplesAndWeights(start, length, outcomes, samples, weights); + List trueSamples = new ArrayList<>(); + List trueWeights = new ArrayList<>(); + List falseSamples = new ArrayList<>(); + List falseWeights = new ArrayList<>(); + double totalTrueWeight = 0; + double totalFalseWeight = 0; + for (int i = 0; i < samples.size(); i++) { + if (outcomes.get(i) == 1) { + totalTrueWeight += weights.get(i); + trueSamples.add(samples.get(i)); + trueWeights.add(weights.get(i)); + } + else { + totalFalseWeight += weights.get(i); + falseSamples.add(samples.get(i)); + falseWeights.add(weights.get(i)); + } + } + double entropy = calculateEntropy(samples, weights); + double reduced = entropy; + double positive = calculateEntropy(trueSamples, trueWeights); + reduced -= positive * (totalTrueWeight / (totalTrueWeight + totalFalseWeight)); + double negative = calculateEntropy(falseSamples, falseWeights); + reduced -= negative * (totalFalseWeight / (totalTrueWeight + totalFalseWeight)); + double mutualInformation = Math.min(1.0, Math.max(reduced / entropy, 0.0)); + return mutualInformation; + } + + protected static double calculateEntropy(List samples, List weights) + { + double entropy = samples.size() * AbstractTestFixedHistogramAggregation.calculateEntropy(samples, weights); + for (int i = 0; i < samples.size(); ++i) { + List subSamples = new ArrayList<>(samples); + subSamples.remove(i); + List subWeights = new ArrayList<>(weights); + subWeights.remove(i); + + double holdOutEntropy = (samples.size() - 1) * AbstractTestFixedHistogramAggregation.calculateEntropy(subSamples, subWeights) / samples.size(); + entropy -= holdOutEntropy; + } + return entropy; + } +} diff --git a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/TestFixedHistogramMleAggregation.java b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/TestFixedHistogramMleAggregation.java new file mode 100644 index 0000000000000..9f915675a4f65 --- /dev/null +++ b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/TestFixedHistogramMleAggregation.java @@ -0,0 +1,150 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.operator.aggregation.differentialmutualinformationclassification; + +import com.facebook.presto.operator.aggregation.differentialentropy.DifferentialEntropyStateStrategy; +import com.facebook.presto.spi.PrestoException; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.List; + +import static com.facebook.presto.block.BlockAssertions.createDoublesBlock; +import static com.facebook.presto.block.BlockAssertions.createLongsBlock; +import static com.facebook.presto.block.BlockAssertions.createStringsBlock; +import static com.facebook.presto.operator.aggregation.AggregationTestUtils.aggregation; + +public class TestFixedHistogramMleAggregation + extends AbstractTestFixedHistogramAggregation +{ + public TestFixedHistogramMleAggregation() + { + super(DifferentialEntropyStateStrategy.FIXED_HISTOGRAM_MLE_METHOD_NAME); + } + + @Test( + expectedExceptions = PrestoException.class, + expectedExceptionsMessageRegExp = "In differential_entropy UDF, bucket count must be non-negative: -200") + public void testIllegalBucketCount() + { + aggregation( + getFunction(), + createLongsBlock(-200), + createLongsBlock(1), + createDoublesBlock(0.1), + createDoublesBlock(0.2), + createStringsBlock(method), + createDoublesBlock(0.0), + createDoublesBlock(0.2)); + } + + @Test( + expectedExceptions = PrestoException.class, + expectedExceptionsMessageRegExp = "In differential_entropy UDF, weight must be non-negative: -0.2") + public void testNegativeWeight() + { + aggregation( + getFunction(), + createLongsBlock(200), + createLongsBlock(1), + createDoublesBlock(0.1), + createDoublesBlock(-0.2), + createStringsBlock(method), + createDoublesBlock(0.0), + createDoublesBlock(0.2)); + } + + @Test( + expectedExceptions = PrestoException.class, + expectedExceptionsMessageRegExp = "In differential_entropy UDF, sample must be at least min: sample=-100.0, min=0.0") + public void testTooSmallSample() + { + aggregation( + getFunction(), + createLongsBlock(200), + createLongsBlock(1), + createDoublesBlock(-100.0), + createDoublesBlock(0.2), + createStringsBlock(method), + createDoublesBlock(0.0), + createDoublesBlock(0.2)); + } + + @Test( + expectedExceptions = PrestoException.class, + expectedExceptionsMessageRegExp = "In differential_entropy UDF, sample must be at most max: sample=300.0, max=0.2") + public void testTooLargeSample() + { + aggregation( + getFunction(), + createLongsBlock(200), + createLongsBlock(1), + createDoublesBlock(300.0), + createDoublesBlock(0.2), + createStringsBlock(method), + createDoublesBlock(0.0), + createDoublesBlock(0.2)); + } + + @Test( + expectedExceptions = PrestoException.class, + expectedExceptionsMessageRegExp = "In differential_entropy UDF, min must be larger than max: min=0.2, max=0.1") + public void testIllegalMinMax() + { + aggregation( + getFunction(), + createLongsBlock(200), + createLongsBlock(1), + createDoublesBlock(0.1), + createDoublesBlock(0.2), + createStringsBlock(method), + createDoublesBlock(0.2), + createDoublesBlock(0.1)); + } + + @Override + public Double getExpectedValue(int start, int length) + { + List outcomes = new ArrayList<>(); + List samples = new ArrayList<>(); + List weights = new ArrayList<>(); + generateOutcomesSamplesAndWeights(start, length, outcomes, samples, weights); + List trueSamples = new ArrayList<>(); + List trueWeights = new ArrayList<>(); + List falseSamples = new ArrayList<>(); + List falseWeights = new ArrayList<>(); + double totalTrueWeight = 0; + double totalFalseWeight = 0; + for (int i = 0; i < samples.size(); i++) { + if (outcomes.get(i) == 1) { + totalTrueWeight += weights.get(i); + trueSamples.add(samples.get(i)); + trueWeights.add(weights.get(i)); + } + else { + totalFalseWeight += weights.get(i); + falseSamples.add(samples.get(i)); + falseWeights.add(weights.get(i)); + } + } + double entropy = calculateEntropy(samples, weights); + double reduced = entropy; + double positive = calculateEntropy(trueSamples, trueWeights); + reduced -= positive * (totalTrueWeight / (totalTrueWeight + totalFalseWeight)); + double negative = calculateEntropy(falseSamples, falseWeights); + reduced -= negative * (totalFalseWeight / (totalTrueWeight + totalFalseWeight)); + double mutualInformation = Math.min(1.0, Math.max(reduced / entropy, 0.0)); + return mutualInformation; + } +} diff --git a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/TestIllegalMethodAggregation.java b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/TestIllegalMethodAggregation.java new file mode 100644 index 0000000000000..f30ffadf29fbb --- /dev/null +++ b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/TestIllegalMethodAggregation.java @@ -0,0 +1,71 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.operator.aggregation.differentialmutualinformationclassification; + +import com.facebook.presto.metadata.FunctionManager; +import com.facebook.presto.metadata.MetadataManager; +import com.facebook.presto.operator.aggregation.InternalAggregationFunction; +import com.facebook.presto.spi.PrestoException; +import org.testng.annotations.Test; + +import static com.facebook.presto.block.BlockAssertions.createDoublesBlock; +import static com.facebook.presto.block.BlockAssertions.createLongsBlock; +import static com.facebook.presto.block.BlockAssertions.createStringsBlock; +import static com.facebook.presto.operator.aggregation.AggregationTestUtils.aggregation; +import static com.facebook.presto.spi.type.BigintType.BIGINT; +import static com.facebook.presto.spi.type.DoubleType.DOUBLE; +import static com.facebook.presto.spi.type.VarcharType.VARCHAR; +import static com.facebook.presto.sql.analyzer.TypeSignatureProvider.fromTypes; + +class TestIllegalMethodAggregation +{ + @Test( + expectedExceptions = PrestoException.class, + expectedExceptionsMessageRegExp = "In differential_entropy UDF, invalid method: no_such_method") + public void testIllegalMethod() + { + FunctionManager functionManager = MetadataManager.createTestMetadataManager().getFunctionManager(); + InternalAggregationFunction function = functionManager.getAggregateFunctionImplementation( + functionManager.lookupFunction( + "differential_entropy", + fromTypes(BIGINT, DOUBLE, DOUBLE, VARCHAR, DOUBLE, DOUBLE))); + aggregation( + function, + createLongsBlock(200), + createDoublesBlock(0.1), + createDoublesBlock(0.2), + createStringsBlock("no_such_method"), + createDoublesBlock(0.0), + createDoublesBlock(1.0)); + } + + @Test + public void testNullMethod() + { + FunctionManager functionManager = MetadataManager.createTestMetadataManager().getFunctionManager(); + InternalAggregationFunction function = functionManager.getAggregateFunctionImplementation( + functionManager.lookupFunction( + "differential_entropy", + fromTypes(BIGINT, DOUBLE, DOUBLE, VARCHAR, DOUBLE, DOUBLE))); + createStringsBlock((String) null); + aggregation( + function, + createLongsBlock(200), + createDoublesBlock(0.1), + createDoublesBlock(-0.2), + createStringsBlock((String) null), + createDoublesBlock(0.0), + createDoublesBlock(1.0)); + } +} diff --git a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/TestUnweightedReservoirAggregation.java b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/TestUnweightedReservoirAggregation.java new file mode 100644 index 0000000000000..5aa7f44903165 --- /dev/null +++ b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/TestUnweightedReservoirAggregation.java @@ -0,0 +1,106 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.operator.aggregation.differentialmutualinformationclassification; + +import com.facebook.presto.spi.PrestoException; +import com.facebook.presto.spi.block.Block; +import com.facebook.presto.spi.block.BlockBuilder; +import com.facebook.presto.spi.type.StandardTypes; +import com.google.common.collect.ImmutableList; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.List; + +import static com.facebook.presto.block.BlockAssertions.createDoublesBlock; +import static com.facebook.presto.block.BlockAssertions.createLongsBlock; +import static com.facebook.presto.block.BlockAssertions.createRLEBlock; +import static com.facebook.presto.operator.aggregation.AggregationTestUtils.aggregation; +import static com.facebook.presto.operator.aggregation.differentialentropy.EntropyCalculations.calculateFromSamplesUsingVasicek; +import static com.facebook.presto.spi.type.BigintType.BIGINT; +import static com.facebook.presto.spi.type.DoubleType.DOUBLE; +import static org.testng.Assert.assertTrue; + +class TestUnweightedResegifgbublbulftkgicifunvrintklrvoirAggregation + extends AbstractTestReservoirAggregation +{ + @Test( + expectedExceptions = PrestoException.class, + expectedExceptionsMessageRegExp = "In differential_entropy UDF, max samples must be positive: -200") + public void testInvalidMaxSamples() + { + aggregation( + getFunction(), + createLongsBlock(-200), + createLongsBlock(1), + createDoublesBlock(0.1)); + } + + @Override + public Block[] getSequenceBlocks(int start, int length) + { + int positionCount = LENGTH_FACTOR * length; + BlockBuilder outcomes = BIGINT.createBlockBuilder(null, positionCount); + BlockBuilder samples = DOUBLE.createBlockBuilder(null, positionCount); + for (int i = start; i < start + LENGTH_FACTOR * length; i++) { + BIGINT.writeLong(outcomes, i % TRUE_FACTOR == 0 ? 0 : 1); + DOUBLE.writeDouble(samples, i); + } + + return new Block[] { + createRLEBlock(AbstractTestReservoirAggregation.MAX_SAMPLES, positionCount), + outcomes.build(), + samples.build() + }; + } + + @Override + protected List getFunctionParameterTypes() + { + return ImmutableList.of(StandardTypes.INTEGER, StandardTypes.INTEGER, StandardTypes.DOUBLE); + } + + @Override + public Double getExpectedValue(int start, int length) + { + assertTrue(LENGTH_FACTOR * length < MAX_SAMPLES); + List samples = new ArrayList<>(); + List trueSamples = new ArrayList<>(); + List falseSamples = new ArrayList<>(); + double totalTrueWeight = 0; + double totalFalseWeight = 0; + for (int i = start; i < start + LENGTH_FACTOR * length; i++) { + samples.add((double) i); + if (i % TRUE_FACTOR != 0) { + totalTrueWeight += 1; + trueSamples.add((double) i); + } + else { + totalFalseWeight += 1; + falseSamples.add((double) i); + } + } + double entropy = calculateFromSamplesUsingVasicek(samples.stream().mapToDouble(Double::doubleValue).toArray()); + if (entropy == 0) { + return Double.NaN; + } + double trueEntropy = calculateFromSamplesUsingVasicek(trueSamples.stream().mapToDouble(Double::doubleValue).toArray()); + double falseEntropy = calculateFromSamplesUsingVasicek(falseSamples.stream().mapToDouble(Double::doubleValue).toArray()); + double reduced = entropy; + reduced -= trueEntropy * (totalTrueWeight / (totalTrueWeight + totalFalseWeight)); + reduced -= falseEntropy * (totalFalseWeight / (totalTrueWeight + totalFalseWeight)); + double mutualInformation = Math.min(1.0, Math.max(reduced / entropy, 0.0)); + return mutualInformation; + } +} diff --git a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/TestUnweightedReservoirSampleStateStrategy.java b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/TestUnweightedReservoirSampleStateStrategy.java new file mode 100644 index 0000000000000..33834816b7deb --- /dev/null +++ b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/TestUnweightedReservoirSampleStateStrategy.java @@ -0,0 +1,25 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.operator.aggregation.differentialmutualinformationclassification; + +import com.facebook.presto.operator.aggregation.differentialentropy.UnweightedReservoirSampleStateStrategy; + +class TestUnweightedReservoirSampleStateStrategy + extends AbstractTestStateStrategy +{ + public TestUnweightedReservoirSampleStateStrategy() + { + super(size -> new UnweightedReservoirSampleStateStrategy(size), false); + } +} diff --git a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/TestWeightedReservoirAggregation.java b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/TestWeightedReservoirAggregation.java new file mode 100644 index 0000000000000..49ea96b38de39 --- /dev/null +++ b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/differentialmutualinformationclassification/TestWeightedReservoirAggregation.java @@ -0,0 +1,124 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.operator.aggregation.differentialmutualinformationclassification; + +import com.facebook.presto.spi.PrestoException; +import com.facebook.presto.spi.block.Block; +import com.facebook.presto.spi.block.BlockBuilder; +import com.facebook.presto.spi.type.StandardTypes; +import com.google.common.collect.ImmutableList; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.List; + +import static com.facebook.presto.block.BlockAssertions.createDoublesBlock; +import static com.facebook.presto.block.BlockAssertions.createLongsBlock; +import static com.facebook.presto.block.BlockAssertions.createRLEBlock; +import static com.facebook.presto.operator.aggregation.AggregationTestUtils.aggregation; +import static com.facebook.presto.operator.aggregation.differentialentropy.EntropyCalculations.calculateFromSamplesUsingVasicek; +import static com.facebook.presto.spi.type.BigintType.BIGINT; +import static com.facebook.presto.spi.type.DoubleType.DOUBLE; +import static org.testng.Assert.assertTrue; + +public class TestWeightedReservoirAggregation + extends AbstractTestReservoirAggregation +{ + @Test( + expectedExceptions = PrestoException.class, + expectedExceptionsMessageRegExp = "In differential_entropy UDF, max samples must be positive: -200") + public void testInvalidMaxSamples() + { + aggregation( + getFunction(), + createLongsBlock(-200), + createLongsBlock(1), + createDoublesBlock(0.1), + createDoublesBlock(0.2)); + } + + @Test( + expectedExceptions = PrestoException.class, + expectedExceptionsMessageRegExp = "In differential_entropy UDF, weight must be non-negative: -0.2") + public void testNegativeWeight() + { + aggregation( + getFunction(), + createLongsBlock(200), + createLongsBlock(1), + createDoublesBlock(0.1), + createDoublesBlock(-0.2)); + } + + @Override + public Block[] getSequenceBlocks(int start, int length) + { + int positionCount = LENGTH_FACTOR * length; + BlockBuilder outcomes = BIGINT.createBlockBuilder(null, positionCount); + BlockBuilder samples = DOUBLE.createBlockBuilder(null, positionCount); + BlockBuilder weights = DOUBLE.createBlockBuilder(null, positionCount); + for (int i = start; i < start + LENGTH_FACTOR * length; i++) { + BIGINT.writeLong(outcomes, i % TRUE_FACTOR == 0 ? 0 : 1); + DOUBLE.writeDouble(samples, i); + DOUBLE.writeDouble(weights, Math.abs(i) % 3 + 1); + } + + return new Block[] { + createRLEBlock(AbstractTestReservoirAggregation.MAX_SAMPLES, positionCount), + outcomes.build(), + samples.build(), + weights.build() + }; + } + + @Override + protected List getFunctionParameterTypes() + { + return ImmutableList.of(StandardTypes.INTEGER, StandardTypes.INTEGER, StandardTypes.DOUBLE, StandardTypes.DOUBLE); + } + + @Override + public Double getExpectedValue(int start, int length) + { + assertTrue(LENGTH_FACTOR * length < MAX_SAMPLES); + List samples = new ArrayList<>(); + List trueSamples = new ArrayList<>(); + List falseSamples = new ArrayList<>(); + double totalTrueWeight = 0; + double totalFalseWeight = 0; + for (int i = start; i < start + LENGTH_FACTOR * length; i++) { + double weight = (i % 3) + 1; + samples.add((double) i); + if (i % TRUE_FACTOR != 0) { + totalTrueWeight += weight; + trueSamples.add((double) i); + } + else { + totalFalseWeight += weight; + falseSamples.add((double) i); + } + } + double entropy = calculateFromSamplesUsingVasicek(samples.stream().mapToDouble(Double::doubleValue).toArray()); + if (entropy == 0) { + return Double.NaN; + } + double trueEntropy = calculateFromSamplesUsingVasicek(trueSamples.stream().mapToDouble(Double::doubleValue).toArray()); + double falseEntropy = calculateFromSamplesUsingVasicek(falseSamples.stream().mapToDouble(Double::doubleValue).toArray()); + double reduced = entropy; + reduced -= trueEntropy * (totalTrueWeight / (totalTrueWeight + totalFalseWeight)); + reduced -= falseEntropy * (totalFalseWeight / (totalTrueWeight + totalFalseWeight)); + double mutualInformation = Math.min(1.0, Math.max(reduced / entropy, 0.0)); + return mutualInformation; + } +} diff --git a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/discreteentropy/AbstractTestStateStrategy.java b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/discreteentropy/AbstractTestStateStrategy.java new file mode 100644 index 0000000000000..8d0db21b88f04 --- /dev/null +++ b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/discreteentropy/AbstractTestStateStrategy.java @@ -0,0 +1,52 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.operator.aggregation.discreteentropy; + +import org.testng.annotations.Test; + +import java.util.Random; +import java.util.function.Supplier; + +import static org.testng.Assert.assertEquals; + +public class AbstractTestStateStrategy +{ + Supplier strategySupplier; + boolean weighted; + + protected AbstractTestStateStrategy(Supplier strategySupplier, boolean weighted) + { + this.strategySupplier = strategySupplier; + this.weighted = weighted; + } + + @Test + public void testUniformDistribution() + { + int size = 10; + DiscreteEntropyStateStrategy strategy = strategySupplier.get(); + Random random = new Random(13); + for (int i = 0; i < 9_999_999; i++) { + int value = random.nextInt(size); + if (weighted) { + strategy.add(value, random.nextInt(10)); + } + else { + strategy.add(value); + } + } + double expected = Math.log(10) / Math.log(2); + assertEquals(strategy.calculateEntropy(), expected, 0.1); + } +} diff --git a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/discreteentropy/TestBooleanExplicitJacknifeAggregation.java b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/discreteentropy/TestBooleanExplicitJacknifeAggregation.java new file mode 100644 index 0000000000000..29ea176750a07 --- /dev/null +++ b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/discreteentropy/TestBooleanExplicitJacknifeAggregation.java @@ -0,0 +1,129 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.operator.aggregation.discreteentropy; + +import com.facebook.presto.metadata.FunctionManager; +import com.facebook.presto.metadata.MetadataManager; +import com.facebook.presto.operator.aggregation.AbstractTestAggregationFunction; +import com.facebook.presto.operator.aggregation.InternalAggregationFunction; +import com.facebook.presto.spi.block.Block; +import com.facebook.presto.spi.block.BlockBuilder; +import com.facebook.presto.spi.type.StandardTypes; +import com.google.common.collect.ImmutableList; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import java.util.List; + +import static com.facebook.presto.block.BlockAssertions.createBooleansBlock; +import static com.facebook.presto.block.BlockAssertions.createRLEBlock; +import static com.facebook.presto.block.BlockAssertions.createStringsBlock; +import static com.facebook.presto.operator.aggregation.AggregationTestUtils.assertAggregation; +import static com.facebook.presto.operator.aggregation.discreteentropy.DiscreteEntropyStateStrategy.JACKNIFE_METHOD_NAME; +import static com.facebook.presto.spi.type.BooleanType.BOOLEAN; +import static com.facebook.presto.spi.type.VarcharType.VARCHAR; +import static com.facebook.presto.sql.analyzer.TypeSignatureProvider.fromTypes; + +public class TestBooleanExplicitJacknifeAggregation + extends AbstractTestAggregationFunction +{ + private static final String FUNCTION_NAME = "discrete_entropy"; + + private InternalAggregationFunction entropyFunction; + + @BeforeClass + public void setUp() + { + FunctionManager functionManager = MetadataManager.createTestMetadataManager().getFunctionManager(); + entropyFunction = functionManager.getAggregateFunctionImplementation( + functionManager.lookupFunction(TestBooleanExplicitJacknifeAggregation.FUNCTION_NAME, fromTypes(BOOLEAN, VARCHAR))); + } + + @Test + public void testEntropyOfASingle() + { + assertAggregation(entropyFunction, + 0.0, + createBooleansBlock(Boolean.FALSE), + createStringsBlock(JACKNIFE_METHOD_NAME)); + } + + @Test + public void testEntropyOfTwoDistinct() + { + assertAggregation(entropyFunction, + 2.0, + createBooleansBlock(Boolean.FALSE, Boolean.TRUE), + createStringsBlock(JACKNIFE_METHOD_NAME, JACKNIFE_METHOD_NAME)); + + assertAggregation(entropyFunction, + 2.0, + createBooleansBlock(null, Boolean.FALSE, null, Boolean.TRUE), + createStringsBlock(null, JACKNIFE_METHOD_NAME, JACKNIFE_METHOD_NAME, JACKNIFE_METHOD_NAME)); + } + + @Test + public void testEntropyOfOnlyNulls() + { + assertAggregation(entropyFunction, + 0.0, + createBooleansBlock(null, null, null), + createStringsBlock(JACKNIFE_METHOD_NAME, JACKNIFE_METHOD_NAME, null)); + } + + @Override + public Block[] getSequenceBlocks(int start, int length) + { + BlockBuilder samples = BOOLEAN.createBlockBuilder(null, length); + for (int i = start; i < start + length; i++) { + BOOLEAN.writeBoolean(samples, Math.abs(i) % 2 == 0); + } + return new Block[] { + samples.build(), + createRLEBlock(JACKNIFE_METHOD_NAME, length), + }; + } + + @Override + public Number getExpectedValue(int start, int length) + { + int[] counts = {0, 0}; + for (int i = start; i < start + length; i++) { + ++counts[Math.abs(i) % 2]; + } + double entropy = length * EntropyCalculations.calculateEntropy(counts); + for (int j = start; j < start + length; j++) { + int[] holdouts = {0, 0}; + for (int i = start; i < start + length; i++) { + if (j != i) { + ++holdouts[Math.abs(i) % 2]; + } + } + entropy -= (length - 1) * EntropyCalculations.calculateEntropy(holdouts) / length; + } + return entropy; + } + + @Override + protected String getFunctionName() + { + return FUNCTION_NAME; + } + + @Override + protected List getFunctionParameterTypes() + { + return ImmutableList.of(StandardTypes.BOOLEAN, StandardTypes.VARCHAR); + } +} diff --git a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/discreteentropy/TestDoubleWeightAggregation.java b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/discreteentropy/TestDoubleWeightAggregation.java new file mode 100644 index 0000000000000..d1b10e755e204 --- /dev/null +++ b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/discreteentropy/TestDoubleWeightAggregation.java @@ -0,0 +1,138 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.operator.aggregation.discreteentropy; + +import com.facebook.presto.metadata.FunctionManager; +import com.facebook.presto.metadata.MetadataManager; +import com.facebook.presto.operator.aggregation.AbstractTestAggregationFunction; +import com.facebook.presto.operator.aggregation.InternalAggregationFunction; +import com.facebook.presto.spi.block.Block; +import com.facebook.presto.spi.block.BlockBuilder; +import com.facebook.presto.spi.type.StandardTypes; +import com.google.common.collect.ImmutableList; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import java.util.List; + +import static com.facebook.presto.block.BlockAssertions.createDoublesBlock; +import static com.facebook.presto.operator.aggregation.AggregationTestUtils.assertAggregation; +import static com.facebook.presto.spi.type.DoubleType.DOUBLE; +import static com.facebook.presto.sql.analyzer.TypeSignatureProvider.fromTypes; + +public class TestDoubleWeightAggregation + extends AbstractTestAggregationFunction +{ + private static final String FUNCTION_NAME = "discrete_entropy"; + + private InternalAggregationFunction entropyFunction; + + @BeforeClass + public void setUp() + { + FunctionManager functionManager = MetadataManager.createTestMetadataManager().getFunctionManager(); + entropyFunction = functionManager.getAggregateFunctionImplementation( + functionManager.lookupFunction(TestDoubleWeightAggregation.FUNCTION_NAME, fromTypes(DOUBLE, DOUBLE))); + } + + @Test + public void testEntropyOfASingle() + { + assertAggregation(entropyFunction, + 0.0, + createDoublesBlock(Double.valueOf(1.0)), + createDoublesBlock(Double.valueOf(1.0))); + } + + @Test + public void testEntropyOfTwoDistinct() + { + assertAggregation(entropyFunction, + 1.0, + createDoublesBlock(1.0, 2.0), + createDoublesBlock(1.0, 1.0)); + + assertAggregation(entropyFunction, + 1.0, + createDoublesBlock(1.0, null, 2.0, null), + createDoublesBlock(1.0, 1.0, 1.0, null)); + } + + @Test + public void testEntropyOfSkewedTwoDistinct() + { + assertAggregation(entropyFunction, + 0.9182958340544894, + createDoublesBlock(1.0, 2.0), + createDoublesBlock(1.0, 2.0)); + + assertAggregation(entropyFunction, + 0.9182958340544894, + createDoublesBlock(1.0, 2.0, 2.0), + createDoublesBlock(2.0, 2.0, 2.0)); + + assertAggregation(entropyFunction, + 0.9182958340544894, + createDoublesBlock(null, 1.0, 2.0, 2.0), + createDoublesBlock(null, 2.0, 2.0, 2.0)); + } + + @Test + public void testEntropyOfOnlyNulls() + { + assertAggregation(entropyFunction, + 0.0, + createDoublesBlock(null, null), + createDoublesBlock(null, null)); + } + + @Override + public Block[] getSequenceBlocks(int start, int length) + { + BlockBuilder samples = DOUBLE.createBlockBuilder(null, length); + BlockBuilder weights = DOUBLE.createBlockBuilder(null, length); + for (int i = start; i < start + length; i++) { + double current = Math.abs(i) % 2; + DOUBLE.writeDouble(samples, current); + DOUBLE.writeDouble(weights, current); + } + return new Block[] { + samples.build(), + weights.build()}; + } + + @Override + public Number getExpectedValue(int start, int length) + { + double[] weights = {0.0, 0.0}; + for (int i = start; i < start + length; i++) { + weights[Math.abs(i) % 2] += (double) (Math.abs(i) % 2); + } + return EntropyCalculations.calculateEntropy(weights); + } + + @Override + protected String getFunctionName() + { + return FUNCTION_NAME; + } + + @Override + protected List getFunctionParameterTypes() + { + return ImmutableList.of( + StandardTypes.DOUBLE, + StandardTypes.DOUBLE); + } +} diff --git a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/discreteentropy/TestIntAggregation.java b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/discreteentropy/TestIntAggregation.java new file mode 100644 index 0000000000000..a8d166975a082 --- /dev/null +++ b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/discreteentropy/TestIntAggregation.java @@ -0,0 +1,121 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.operator.aggregation.discreteentropy; + +import com.facebook.presto.metadata.FunctionManager; +import com.facebook.presto.metadata.MetadataManager; +import com.facebook.presto.operator.aggregation.AbstractTestAggregationFunction; +import com.facebook.presto.operator.aggregation.InternalAggregationFunction; +import com.facebook.presto.spi.block.Block; +import com.facebook.presto.spi.block.BlockBuilder; +import com.facebook.presto.spi.type.StandardTypes; +import com.google.common.collect.ImmutableList; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import java.util.List; + +import static com.facebook.presto.block.BlockAssertions.createLongsBlock; +import static com.facebook.presto.operator.aggregation.AggregationTestUtils.assertAggregation; +import static com.facebook.presto.spi.type.BigintType.BIGINT; +import static com.facebook.presto.spi.type.DoubleType.DOUBLE; +import static com.facebook.presto.sql.analyzer.TypeSignatureProvider.fromTypes; + +public class TestIntAggregation + extends AbstractTestAggregationFunction +{ + private static final String FUNCTION_NAME = "discrete_entropy"; + + private InternalAggregationFunction entropyFunction; + + @BeforeClass + public void setUp() + { + FunctionManager functionManager = MetadataManager.createTestMetadataManager().getFunctionManager(); + entropyFunction = functionManager.getAggregateFunctionImplementation( + functionManager.lookupFunction(TestIntAggregation.FUNCTION_NAME, fromTypes(DOUBLE))); + } + + @Test + public void testEntropyOfASingle() + { + assertAggregation(entropyFunction, + 0.0, + createLongsBlock(Long.valueOf(1))); + } + + @Test + public void testEntropyOfTwoDistinct() + { + assertAggregation(entropyFunction, + 1.0, + createLongsBlock(1, 2)); + + assertAggregation(entropyFunction, + 1.0, + createLongsBlock(null, 1L, null, 2L)); + } + + @Test + public void testEntropyOfSkewedTwoDistinct() + { + assertAggregation(entropyFunction, + 0.9182958340544894, + createLongsBlock(1, 1, 2)); + + assertAggregation(entropyFunction, + 0.9182958340544894, + createLongsBlock(null, 1L, null, 1L, 2L)); + } + + @Test + public void testEntropyOfOnlyNulls() + { + assertAggregation(entropyFunction, + 0.0, + createLongsBlock(null, null)); + } + + @Override + public Block[] getSequenceBlocks(int start, int length) + { + BlockBuilder samples = BIGINT.createBlockBuilder(null, length); + for (int i = start; i < start + length; i++) { + BIGINT.writeLong(samples, Math.abs(i) % 2); + } + return new Block[] {samples.build()}; + } + + @Override + public Number getExpectedValue(int start, int length) + { + int[] counts = {0, 0}; + for (int i = start; i < start + length; i++) { + ++counts[Math.abs(i) % 2]; + } + return EntropyCalculations.calculateEntropy(counts); + } + + @Override + protected String getFunctionName() + { + return FUNCTION_NAME; + } + + @Override + protected List getFunctionParameterTypes() + { + return ImmutableList.of(StandardTypes.BIGINT); + } +} diff --git a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/discreteentropy/TestUnweightedJacknifeStateStrategy.java b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/discreteentropy/TestUnweightedJacknifeStateStrategy.java new file mode 100644 index 0000000000000..c4c8477c4c496 --- /dev/null +++ b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/discreteentropy/TestUnweightedJacknifeStateStrategy.java @@ -0,0 +1,23 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.operator.aggregation.discreteentropy; + +public class TestUnweightedJacknifeStateStrategy + extends AbstractTestStateStrategy +{ + public TestUnweightedJacknifeStateStrategy() + { + super(() -> new UnweightedJacknifeStateStrategy(), false); + } +} diff --git a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/discreteentropy/TestUnweightedMleStateStrategy.java b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/discreteentropy/TestUnweightedMleStateStrategy.java new file mode 100644 index 0000000000000..d6a408b0de905 --- /dev/null +++ b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/discreteentropy/TestUnweightedMleStateStrategy.java @@ -0,0 +1,23 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.operator.aggregation.discreteentropy; + +public class TestUnweightedMleStateStrategy + extends AbstractTestStateStrategy +{ + public TestUnweightedMleStateStrategy() + { + super(() -> new UnweightedMleStateStrategy(), false); + } +} diff --git a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/discreteentropy/TestVarcharExplicitMleAggregation.java b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/discreteentropy/TestVarcharExplicitMleAggregation.java new file mode 100644 index 0000000000000..714991006ec57 --- /dev/null +++ b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/discreteentropy/TestVarcharExplicitMleAggregation.java @@ -0,0 +1,131 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.operator.aggregation.discreteentropy; + +import com.facebook.presto.metadata.FunctionManager; +import com.facebook.presto.metadata.MetadataManager; +import com.facebook.presto.operator.aggregation.AbstractTestAggregationFunction; +import com.facebook.presto.operator.aggregation.InternalAggregationFunction; +import com.facebook.presto.spi.block.Block; +import com.facebook.presto.spi.block.BlockBuilder; +import com.facebook.presto.spi.type.StandardTypes; +import com.google.common.collect.ImmutableList; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import java.util.List; + +import static com.facebook.presto.block.BlockAssertions.createRLEBlock; +import static com.facebook.presto.block.BlockAssertions.createStringsBlock; +import static com.facebook.presto.operator.aggregation.AggregationTestUtils.assertAggregation; +import static com.facebook.presto.operator.aggregation.discreteentropy.DiscreteEntropyStateStrategy.MLE_METHOD_NAME; +import static com.facebook.presto.spi.type.VarcharType.VARCHAR; +import static com.facebook.presto.sql.analyzer.TypeSignatureProvider.fromTypes; + +public class TestVarcharExplicitMleAggregation + extends AbstractTestAggregationFunction +{ + private static final String FUNCTION_NAME = "discrete_entropy"; + + private InternalAggregationFunction entropyFunction; + + @BeforeClass + public void setUp() + { + FunctionManager functionManager = MetadataManager.createTestMetadataManager().getFunctionManager(); + entropyFunction = functionManager.getAggregateFunctionImplementation( + functionManager.lookupFunction(TestVarcharExplicitMleAggregation.FUNCTION_NAME, fromTypes(VARCHAR, VARCHAR))); + } + + @Test + public void testEntropyOfASingle() + { + assertAggregation(entropyFunction, + 0.0, + createStringsBlock(new String("false")), + createStringsBlock(MLE_METHOD_NAME)); + } + + @Test + public void testEntropyOfTwoDistinct() + { + assertAggregation(entropyFunction, + 1.0, + createStringsBlock("false", "true"), + createStringsBlock(MLE_METHOD_NAME, MLE_METHOD_NAME)); + + assertAggregation(entropyFunction, + 1.0, + createStringsBlock("false", "true", "true"), + createStringsBlock(MLE_METHOD_NAME, MLE_METHOD_NAME, null)); + } + + @Test + public void testEntropyOfSkewedTwoDistinct() + { + assertAggregation(entropyFunction, + 0.9182958340544894, + createStringsBlock("false", "false", "true"), + createStringsBlock(MLE_METHOD_NAME, MLE_METHOD_NAME, MLE_METHOD_NAME)); + + assertAggregation(entropyFunction, + 0.9182958340544894, + createStringsBlock("false", "false", "true", "true"), + createStringsBlock(MLE_METHOD_NAME, MLE_METHOD_NAME, MLE_METHOD_NAME, null)); + } + + @Test + public void testEntropyOfOnlyNulls() + { + assertAggregation(entropyFunction, + 0.0, + createStringsBlock(null, null, null), + createStringsBlock(MLE_METHOD_NAME, MLE_METHOD_NAME, null)); + } + + @Override + public Block[] getSequenceBlocks(int start, int length) + { + BlockBuilder samples = VARCHAR.createBlockBuilder(null, length); + for (int i = start; i < start + length; i++) { + VARCHAR.writeString(samples, Integer.toString(Math.abs(i) % 2)); + } + return new Block[] { + samples.build(), + createRLEBlock(MLE_METHOD_NAME, length), + }; + } + + @Override + public Number getExpectedValue(int start, int length) + { + int[] counts = {0, 0}; + for (int i = start; i < start + length; i++) { + ++counts[Math.abs(i) % 2]; + } + return EntropyCalculations.calculateEntropy(counts); + } + + @Override + protected String getFunctionName() + { + return FUNCTION_NAME; + } + + @Override + protected List getFunctionParameterTypes() + { + return ImmutableList.of(StandardTypes.VARCHAR, StandardTypes.VARCHAR); + } +} diff --git a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/discreteentropy/TestVarcharWeightExplicitJacknifeAggregation.java b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/discreteentropy/TestVarcharWeightExplicitJacknifeAggregation.java new file mode 100644 index 0000000000000..42620e341cd2a --- /dev/null +++ b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/discreteentropy/TestVarcharWeightExplicitJacknifeAggregation.java @@ -0,0 +1,143 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.operator.aggregation.discreteentropy; + +import com.facebook.presto.metadata.FunctionManager; +import com.facebook.presto.metadata.MetadataManager; +import com.facebook.presto.operator.aggregation.AbstractTestAggregationFunction; +import com.facebook.presto.operator.aggregation.InternalAggregationFunction; +import com.facebook.presto.spi.block.Block; +import com.facebook.presto.spi.block.BlockBuilder; +import com.facebook.presto.spi.type.StandardTypes; +import com.google.common.collect.ImmutableList; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import java.util.List; + +import static com.facebook.presto.block.BlockAssertions.createDoublesBlock; +import static com.facebook.presto.block.BlockAssertions.createRLEBlock; +import static com.facebook.presto.block.BlockAssertions.createStringsBlock; +import static com.facebook.presto.operator.aggregation.AggregationTestUtils.assertAggregation; +import static com.facebook.presto.operator.aggregation.discreteentropy.DiscreteEntropyStateStrategy.JACKNIFE_METHOD_NAME; +import static com.facebook.presto.spi.type.DoubleType.DOUBLE; +import static com.facebook.presto.spi.type.VarcharType.VARCHAR; +import static com.facebook.presto.sql.analyzer.TypeSignatureProvider.fromTypes; + +public class TestVarcharWeightExplicitJacknifeAggregation + extends AbstractTestAggregationFunction +{ + private static final String FUNCTION_NAME = "discrete_entropy"; + + private InternalAggregationFunction entropyFunction; + + @BeforeClass + public void setUp() + { + FunctionManager functionManager = MetadataManager.createTestMetadataManager().getFunctionManager(); + entropyFunction = functionManager.getAggregateFunctionImplementation( + functionManager.lookupFunction(TestVarcharWeightExplicitJacknifeAggregation.FUNCTION_NAME, fromTypes(VARCHAR, DOUBLE, VARCHAR))); + } + + @Test + public void testEntropyOfASingle() + { + assertAggregation(entropyFunction, + 0.0, + createStringsBlock("false"), + createDoublesBlock(10.0), + createStringsBlock(JACKNIFE_METHOD_NAME)); + } + + @Test + public void testEntropyOfTwoDistinct() + { + assertAggregation(entropyFunction, + 2.0, + createStringsBlock("false", "true"), + createDoublesBlock(10.0, 10.0), + createStringsBlock(JACKNIFE_METHOD_NAME, JACKNIFE_METHOD_NAME)); + + assertAggregation(entropyFunction, + 2.0, + createStringsBlock("false", "true", "true"), + createDoublesBlock(10.0, 10.0, null), + createStringsBlock(JACKNIFE_METHOD_NAME, JACKNIFE_METHOD_NAME, null)); + } + + @Test + public void testEntropyOfOnlyNulls() + { + assertAggregation(entropyFunction, + 0.0, + createStringsBlock(null, null, null), + createDoublesBlock(20.0, 10.0, null), + createStringsBlock(JACKNIFE_METHOD_NAME, JACKNIFE_METHOD_NAME, null)); + } + + @Override + public Block[] getSequenceBlocks(int start, int length) + { + BlockBuilder samples = VARCHAR.createBlockBuilder(null, length); + BlockBuilder weights = DOUBLE.createBlockBuilder(null, length); + for (int i = start; i < start + length; i++) { + double current = Math.abs(i) % 2; + VARCHAR.writeString(samples, Integer.toString((int) current)); + DOUBLE.writeDouble(weights, current); + } + return new Block[] { + samples.build(), + weights.build(), + createRLEBlock(JACKNIFE_METHOD_NAME, length), + }; + } + + @Override + public Number getExpectedValue(int start, int length) + { + int[] counts = {0, 0}; + double[] weights = {0.0, 0.0}; + for (int i = start; i < start + length; i++) { + int current = Math.abs(i) % 2; + ++counts[current]; + weights[current] += current; + } + double entropy = length * EntropyCalculations.calculateEntropy(weights, counts); + for (int j = start; j < start + length; j++) { + int[] holdoutCounts = {0, 0}; + double[] holdoutWeights = {0.0, 0.0}; + for (int i = start; i < start + length; i++) { + if (j != i) { + int current = Math.abs(i) % 2; + ++holdoutCounts[current]; + holdoutWeights[current] += current; + } + } + entropy -= (length - 1) * EntropyCalculations.calculateEntropy(holdoutWeights, holdoutCounts) / length; + } + return entropy; + } + + @Override + protected String getFunctionName() + { + return FUNCTION_NAME; + } + + @Override + protected List getFunctionParameterTypes() + { + return ImmutableList.of(StandardTypes.VARCHAR, StandardTypes.DOUBLE, StandardTypes.VARCHAR); + } +} diff --git a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/discreteentropy/TestVarcharWeightExplicitMleAggregation.java b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/discreteentropy/TestVarcharWeightExplicitMleAggregation.java new file mode 100644 index 0000000000000..b574ec7041703 --- /dev/null +++ b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/discreteentropy/TestVarcharWeightExplicitMleAggregation.java @@ -0,0 +1,143 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.operator.aggregation.discreteentropy; + +import com.facebook.presto.metadata.FunctionManager; +import com.facebook.presto.metadata.MetadataManager; +import com.facebook.presto.operator.aggregation.AbstractTestAggregationFunction; +import com.facebook.presto.operator.aggregation.InternalAggregationFunction; +import com.facebook.presto.spi.block.Block; +import com.facebook.presto.spi.block.BlockBuilder; +import com.facebook.presto.spi.type.StandardTypes; +import com.google.common.collect.ImmutableList; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import java.util.List; + +import static com.facebook.presto.block.BlockAssertions.createDoublesBlock; +import static com.facebook.presto.block.BlockAssertions.createRLEBlock; +import static com.facebook.presto.block.BlockAssertions.createStringsBlock; +import static com.facebook.presto.operator.aggregation.AggregationTestUtils.assertAggregation; +import static com.facebook.presto.operator.aggregation.discreteentropy.DiscreteEntropyStateStrategy.MLE_METHOD_NAME; +import static com.facebook.presto.spi.type.DoubleType.DOUBLE; +import static com.facebook.presto.spi.type.VarcharType.VARCHAR; +import static com.facebook.presto.sql.analyzer.TypeSignatureProvider.fromTypes; + +public class TestVarcharWeightExplicitMleAggregation + extends AbstractTestAggregationFunction +{ + private static final String FUNCTION_NAME = "discrete_entropy"; + + private InternalAggregationFunction entropyFunction; + + @BeforeClass + public void setUp() + { + FunctionManager functionManager = MetadataManager.createTestMetadataManager().getFunctionManager(); + entropyFunction = functionManager.getAggregateFunctionImplementation( + functionManager.lookupFunction(TestVarcharWeightExplicitMleAggregation.FUNCTION_NAME, fromTypes(VARCHAR, DOUBLE, VARCHAR))); + } + + @Test + public void testEntropyOfASingle() + { + assertAggregation(entropyFunction, + 0.0, + createStringsBlock("false"), + createDoublesBlock(10.0), + createStringsBlock(MLE_METHOD_NAME)); + } + + @Test + public void testEntropyOfTwoDistinct() + { + assertAggregation(entropyFunction, + 1.0, + createStringsBlock("false", "true"), + createDoublesBlock(10.0, 10.0), + createStringsBlock(MLE_METHOD_NAME, MLE_METHOD_NAME)); + + assertAggregation(entropyFunction, + 1.0, + createStringsBlock("false", "true", "true"), + createDoublesBlock(10.0, 10.0, null), + createStringsBlock(MLE_METHOD_NAME, MLE_METHOD_NAME, null)); + } + + @Test + public void testEntropyOfSkewedTwoDistinct() + { + assertAggregation(entropyFunction, + 0.9182958340544894, + createStringsBlock("false", "true"), + createDoublesBlock(20.0, 10.0), + createStringsBlock(MLE_METHOD_NAME, MLE_METHOD_NAME)); + + assertAggregation(entropyFunction, + 0.9182958340544894, + createStringsBlock("false", "true", null), + createDoublesBlock(20.0, 10.0, null), + createStringsBlock(MLE_METHOD_NAME, MLE_METHOD_NAME, null)); + } + + @Test + public void testEntropyOfOnlyNulls() + { + assertAggregation(entropyFunction, + 0.0, + createStringsBlock(null, null, null), + createDoublesBlock(20.0, 10.0, null), + createStringsBlock(MLE_METHOD_NAME, MLE_METHOD_NAME, null)); + } + + @Override + public Block[] getSequenceBlocks(int start, int length) + { + BlockBuilder samples = VARCHAR.createBlockBuilder(null, length); + BlockBuilder weights = DOUBLE.createBlockBuilder(null, length); + for (int i = start; i < start + length; i++) { + double current = Math.abs(i) % 2; + VARCHAR.writeString(samples, Integer.toString((int) current)); + DOUBLE.writeDouble(weights, current); + } + return new Block[] { + samples.build(), + weights.build(), + createRLEBlock(MLE_METHOD_NAME, length), + }; + } + + @Override + public Number getExpectedValue(int start, int length) + { + double[] weights = {0.0, 0.0}; + for (int i = start; i < start + length; i++) { + weights[Math.abs(i) % 2] += (double) (Math.abs(i) % 2); + } + return EntropyCalculations.calculateEntropy(weights); + } + + @Override + protected String getFunctionName() + { + return FUNCTION_NAME; + } + + @Override + protected List getFunctionParameterTypes() + { + return ImmutableList.of(StandardTypes.VARCHAR, StandardTypes.DOUBLE, StandardTypes.VARCHAR); + } +} diff --git a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/discreteentropy/TestWeightedJacknifeStateStrategy.java b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/discreteentropy/TestWeightedJacknifeStateStrategy.java new file mode 100644 index 0000000000000..c4806e73987e7 --- /dev/null +++ b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/discreteentropy/TestWeightedJacknifeStateStrategy.java @@ -0,0 +1,23 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.operator.aggregation.discreteentropy; + +public class TestWeightedJacknifeStateStrategy + extends AbstractTestStateStrategy +{ + public TestWeightedJacknifeStateStrategy() + { + super(() -> new WeightedJacknifeStateStrategy(), true); + } +} diff --git a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/discreteentropy/TestWeightedMleStateStrategy.java b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/discreteentropy/TestWeightedMleStateStrategy.java new file mode 100644 index 0000000000000..91266ae41c2cf --- /dev/null +++ b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/discreteentropy/TestWeightedMleStateStrategy.java @@ -0,0 +1,23 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.operator.aggregation.discreteentropy; + +public class TestWeightedMleStateStrategy + extends AbstractTestStateStrategy +{ + public TestWeightedMleStateStrategy() + { + super(() -> new WeightedMleStateStrategy(), true); + } +} diff --git a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/reservoirsample/TestUnweightedDoubleReservoirSample.java b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/reservoirsample/TestUnweightedDoubleReservoirSample.java index 645556564faa3..311c85053fad2 100644 --- a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/reservoirsample/TestUnweightedDoubleReservoirSample.java +++ b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/reservoirsample/TestUnweightedDoubleReservoirSample.java @@ -34,9 +34,10 @@ public void testIllegalMaxSamples() @Test public void testGetMaxSamples() { - UnweightedDoubleReservoirSample sample = new UnweightedDoubleReservoirSample(200); + UnweightedDoubleReservoirSample reservoir = new UnweightedDoubleReservoirSample(200); - assertEquals(sample.getMaxSamples(), 200); + assertEquals(reservoir.getMaxSamples(), 200); + assertEquals(reservoir.getTotalPopulationCount(), 0); } @Test @@ -49,6 +50,7 @@ public void testFew() reservoir.add(3.0); assertEquals(Arrays.stream(reservoir.getSamples()).sorted().toArray(), new double[] {1.0, 2.0, 3.0}); + assertEquals(reservoir.getTotalPopulationCount(), 3); } @Test @@ -58,6 +60,7 @@ public void testMany() long streamLength = 1_000_000; for (int i = 0; i < streamLength; ++i) { + assertEquals(reservoir.getTotalPopulationCount(), i); reservoir.add(i); } diff --git a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/reservoirsample/TestWeightedDoubleReservoirSample.java b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/reservoirsample/TestWeightedDoubleReservoirSample.java index 3fe2df8d1e7a4..ed432a1391014 100644 --- a/presto-main/src/test/java/com/facebook/presto/operator/aggregation/reservoirsample/TestWeightedDoubleReservoirSample.java +++ b/presto-main/src/test/java/com/facebook/presto/operator/aggregation/reservoirsample/TestWeightedDoubleReservoirSample.java @@ -34,9 +34,10 @@ public void testIllegalMaxSamples() @Test public void testGetters() { - WeightedDoubleReservoirSample sample = new WeightedDoubleReservoirSample(200); + WeightedDoubleReservoirSample reservoir = new WeightedDoubleReservoirSample(200); - assertEquals(sample.getMaxSamples(), 200); + assertEquals(reservoir.getMaxSamples(), 200); + assertEquals(reservoir.getTotalPopulationWeight(), 0.0); } @Test @@ -49,6 +50,7 @@ public void testFew() reservoir.add(3.0, 0.5); assertEquals(Arrays.stream(reservoir.getSamples()).sorted().toArray(), new double[] {1.0, 2.0, 3.0}); + assertEquals(reservoir.getTotalPopulationWeight(), 2.5); } @Test @@ -58,6 +60,7 @@ public void testMany() long streamLength = 1_000_000; for (int i = 0; i < streamLength; ++i) { + assertEquals(reservoir.getTotalPopulationWeight(), (double) i); reservoir.add(i, 1.0); } @@ -81,8 +84,10 @@ public void testManyWeighted() WeightedDoubleReservoirSample reservoir = new WeightedDoubleReservoirSample(200); long streamLength = 1_000_000; + double epsilon = 0.00000001; for (int i = 0; i < streamLength; ++i) { - reservoir.add(3, 0.00000001); + assertEquals(reservoir.getTotalPopulationWeight(), epsilon * i, epsilon / 100); + reservoir.add(3, epsilon); } for (int i = 0; i < streamLength; ++i) { reservoir.add(i, 9999999999.0);