aaronshan · szdforward · Jul 23, 2018 · Jul 24, 2018 · Jul 25, 2018 · Jul 26, 2018
diff --git a/.travis.yml b/.travis.yml
@@ -17,7 +17,5 @@ install:
   - mvn install:install-file -DgroupId=javax.jdo -DartifactId=jdo2-api -Dversion=2.3-ec -Dpackaging=jar -Dfile=$HOME/jdo2-api-2.3-ec.jar
 
 script:
-  - jdk_switcher use openjdk7
-  - mvn clean package
   - jdk_switcher use oraclejdk8
   - mvn clean package
diff --git a/README-zh.md b/README-zh.md
@@ -10,7 +10,8 @@
 hive-third-functions 包含了一些很有用的hive udf函数，特别是数组和json函数.
 
 > 注意:
-> hive-third-functions支持hive-0.11.0或更高版本.
+> 1. hive-third-functions支持hive-0.11.0或更高版本.
+> 2. 运行`3.0.0`及以上版本需要Java8及以上
 
 ## 编译
 
@@ -40,7 +41,7 @@ mvn clean package -DskipTests
 
 你也可以直接在发布页下载打包好了最新版本 [发布页](https://github.com/aaronshan/hive-third-functions/releases).
 
-> 当前最新的版本是 `2.1.3`
+> 当前最新的版本是 `3.0.0`
 
 ## 函数
 
@@ -71,6 +72,11 @@ mvn clean package -DskipTests
 |array_value_count(array&lt;E&gt;, E) -> long | 统计数组中包含给定元素的个数.|
 |array_slice(array, start, length) -> array | 对数组进行分片操作，start为正数从前开始分片, start为负数从后开始分片, 长度为指定的长度.|
 |array_element_at(array&lt;E&gt;, index) -> E | 返回指定位置的数组元素. 如果索引位置 < 0, 则从尾部开始计数并返回.|
+|array_filter(array&lt;E&gt;, function<E, boolean>)) -> E | 根据一个返回值为boolean类型的lambda表达式函数来对数组元素进行过滤.|
+|array_shuffle(array) -> array | 对数组shuffle.|
+|sequence(start, end) -> array<Long> | 生成数组序列.|
+|sequence(start, end, step) -> array<Long> | 生成数组序列.|
+|sequence(start_date_string, end_data_string, step) -> array<String> | 生成日期数组序列.|
 
 ### 3. map函数
 | 函数| 描述 |
@@ -145,6 +151,20 @@ mvn clean package -DskipTests
 |url_encode(value) -> string | escapes value by encoding it so that it can be safely included in URL query parameter names and values|
 |url_decode(value) -> string | unescape the URL encoded value. This function is the inverse of `url_encode`. | 
 
+### 10. 数学函数
+
+| function| description |
+|:--|:--|
+|infinity() -> double | 获取正无穷常数|
+|is_finite(x) -> boolean | 判断x是否为有限数值|
+|is_infinite(x) -> boolean |判断x是否为无穷数值|
+|is_nan(x) -> boolean | 判断x是否不是一个数值类型的变量|
+|nan() -> double | 获取一个表示NAN（not-a-number）的常数 |
+|from_base(string, radix) -> bigint | 获取字面量的值，该值的基数为radix|
+|to_base(x, radix) -> varchar | 返回x以radix为基数的字面量|
+|cosine_similarity(x, y) -> double | 返回两个稀疏向量的余弦相似度|
+
+
 ## 用法
 
 将下面这些内容写入 `${HOME}/.hiverc` 文件, 或者也可以按需在hive命令行环境中执行.
@@ -166,6 +186,9 @@ create temporary function array_concat as 'cc.shanruifeng.functions.array.UDFArr
 create temporary function array_value_count as 'cc.shanruifeng.functions.array.UDFArrayValueCount';
 create temporary function array_slice as 'cc.shanruifeng.functions.array.UDFArraySlice';
 create temporary function array_element_at as 'cc.shanruifeng.functions.array.UDFArrayElementAt';
+create temporary function array_filter as 'cc.shanruifeng.functions.array.UDFArrayFilter';
+create temporary function array_shuffle as 'cc.shanruifeng.functions.array.UDFArrayShuffle';
+create temporary function sequence as 'cc.shanruifeng.functions.array.UDFSequence';
 create temporary function bit_count as 'cc.shanruifeng.functions.bitwise.UDFBitCount';
 create temporary function bitwise_and as 'cc.shanruifeng.functions.bitwise.UDFBitwiseAnd';
 create temporary function bitwise_not as 'cc.shanruifeng.functions.bitwise.UDFBitwiseNot';
@@ -205,6 +228,14 @@ create temporary function gcj_to_wgs as 'cc.shanruifeng.functions.geo.UDFGeoGcjT
 create temporary function gcj_extract_wgs as 'cc.shanruifeng.functions.geo.UDFGeoGcjExtractWgs';
 create temporary function url_encode as 'cc.shanruifeng.functions.url.UDFUrlEncode';
 create temporary function url_decode as 'cc.shanruifeng.functions.url.UDFUrlDecode';
+create temporary function infinity as 'cc.shanruifeng.functions.math.UDFMathInfinity';
+create temporary function is_finite as 'cc.shanruifeng.functions.math.UDFMathIsFinite';
+create temporary function is_infinite as 'cc.shanruifeng.functions.math.UDFMathIsInfinite';
+create temporary function is_nan as 'cc.shanruifeng.functions.math.UDFMathIsNaN';
+create temporary function nan as 'cc.shanruifeng.functions.math.UDFMathIsNaN';
+create temporary function from_base as 'cc.shanruifeng.functions.math.UDFMathFromBase';
+create temporary function to_base as 'cc.shanruifeng.functions.math.UDFMathToBase';
+create temporary function cosine_similarity as 'cc.shanruifeng.functions.math.UDFMathCosineSimilarity';
 ```
 
 你可以在hive的命令杭中使用下面的语句来查看函数的细节.
@@ -257,6 +288,15 @@ select array_concat(array(16,12,18,9,null), array(14,9,6,18,null)) => [16,12,18,
 select array_value_count(array(16,13,12,13,18,16,9,18), 13) => 2
 select array_slice(array(16,13,12,13,18,16,9,18), -2, 3) => [9,18]
 select array_element_at(array(16,13,12,13,18,16,9,18), -1) => 18
+select array_filter(array(16,13), 'x -> x > 15') => [16]
+select array_filter(array('a','b'), 'x -> x == \'a\'') => [a]
+select array_filter(array(true, false, NULL), 'x -> x != null && x') => [true]
+select array_filter(array(array('abc', null, '123'), array ('def', 'x', '456')), 'x -> x.get(1) == null') => [['abc', null, '123']]
+select array_shuffle(array(16,12,18,9))
+select sequence(1, 5) => [1, 2, 3, 4, 5]
+select sequence(5, 1) => [5, 4, 3, 2, 1]
+select sequence(1, 9, 4) => [1, 5, 9]
+select sequence('2016-04-12 00:00:00', '2016-04-14 00:00:00', 24*3600*1000) => ['2016-04-12 00:00:00', '2016-04-13 00:00:00', '2016-04-14 00:00:00']
 ```
 
 ```
@@ -302,3 +342,7 @@ select gcj_extract_wgs(39.915, 116.404) => {"lng":116.39775549316407,"lat":39.91
 ```
 select url_encode('http://shanruifeng.cc/') => http%3A%2F%2Fshanruifeng.cc%2F
 ```
+
+```
+select cosine_similarity(map_build(array['a'], array[1.0]), map_build(array['a'], array[2.0])); => 1.0
+```
diff --git a/README.md b/README.md
@@ -10,7 +10,8 @@
 Some useful custom hive udf functions, especial array and json functions.
 
 > Note:
-> hive-third-functions support hive-0.11.0 or higher.
+> 1. hive-third-functions support hive-0.11.0 or higher.
+> 2. hive-third-functions `3.0.0` need java8 or higher.
 
 ## Build
 
@@ -40,7 +41,7 @@ It will generate hive-third-functions-${version}-shaded.jar in target directory.
 
 You can also directly download file from [release page](https://github.com/aaronshan/hive-third-functions/releases).
 
-> current latest version is `2.1.3`
+> current latest version is `3.0.0`
 
 ## Functions
 
@@ -71,6 +72,11 @@ You can also directly download file from [release page](https://github.com/aaron
 |array_value_count(array&lt;E&gt;, E) -> long | count array's element number that element value equals given value.|
 |array_slice(array, start, length) -> array | subsets array starting from index start (or starting from the end if start is negative) with a length of length.|
 |array_element_at(array&lt;E&gt;, index) -> E | returns element of array at given index. If index < 0, element_at accesses elements from the last to the first.|
+|array_filter(array&lt;E&gt;, function<E, boolean>)) -> E | constructs an array from those elements of array for which function returns true.|
+|array_shuffle(array) -> array | Generate a random permutation of the given array x.|
+|sequence(start, end) -> array<Long> | Generate a sequence of integers from start to stop.|
+|sequence(start, end, step) -> array<Long> | Generate a sequence of integers from start to stop, incrementing by step.|
+|sequence(start_date_string, end_data_string, step) -> array<String> | Generate a sequence of date string from start to stop, incrementing by step.|
 
 ### 3. map functions
 | function| description |
@@ -145,6 +151,19 @@ You can also directly download file from [release page](https://github.com/aaron
 |url_encode(value) -> string | escapes value by encoding it so that it can be safely included in URL query parameter names and values|
 |url_decode(value) -> string | unescape the URL encoded value. This function is the inverse of `url_encode`. | 
 
+### 10. math functions
+
+| function| description |
+|:--|:--|
+|infinity() -> double | Returns the constant representing positive infinity.|
+|is_finite(x) -> boolean | Determine if x is finite.|
+|is_infinite(x) -> boolean |Determine if x is infinite.|
+|is_nan(x) -> boolean | Determine if x is not-a-number.|
+|nan() -> double | Returns the constant representing not-a-number. |
+|from_base(string, radix) -> bigint | Returns the value of string interpreted as a base-radix number.|
+|to_base(x, radix) -> varchar | Returns the base-radix representation of x.|
+|cosine_similarity(x, y) -> double | Returns the cosine similarity between the sparse vectors x and y|
+
 ## Use
 
 Put these statements into `${HOME}/.hiverc` or exec its on hive cli env.
@@ -166,6 +185,9 @@ create temporary function array_concat as 'cc.shanruifeng.functions.array.UDFArr
 create temporary function array_value_count as 'cc.shanruifeng.functions.array.UDFArrayValueCount';
 create temporary function array_slice as 'cc.shanruifeng.functions.array.UDFArraySlice';
 create temporary function array_element_at as 'cc.shanruifeng.functions.array.UDFArrayElementAt';
+create temporary function array_filter as 'cc.shanruifeng.functions.array.UDFArrayFilter';
+create temporary function array_shuffle as 'cc.shanruifeng.functions.array.UDFArrayShuffle';
+create temporary function sequence as 'cc.shanruifeng.functions.array.UDFSequence';
 create temporary function bit_count as 'cc.shanruifeng.functions.bitwise.UDFBitCount';
 create temporary function bitwise_and as 'cc.shanruifeng.functions.bitwise.UDFBitwiseAnd';
 create temporary function bitwise_not as 'cc.shanruifeng.functions.bitwise.UDFBitwiseNot';
@@ -204,7 +226,14 @@ create temporary function wgs_to_gcj as 'cc.shanruifeng.functions.geo.UDFGeoWgsT
 create temporary function gcj_to_wgs as 'cc.shanruifeng.functions.geo.UDFGeoGcjToWgs';
 create temporary function gcj_extract_wgs as 'cc.shanruifeng.functions.geo.UDFGeoGcjExtractWgs';
 create temporary function url_encode as 'cc.shanruifeng.functions.url.UDFUrlEncode';
-create temporary function url_decode as 'cc.shanruifeng.functions.url.UDFUrlDecode';
+create temporary function infinity as 'cc.shanruifeng.functions.math.UDFMathInfinity';
+create temporary function is_finite as 'cc.shanruifeng.functions.math.UDFMathIsFinite';
+create temporary function is_infinite as 'cc.shanruifeng.functions.math.UDFMathIsInfinite';
+create temporary function is_nan as 'cc.shanruifeng.functions.math.UDFMathIsNaN';
+create temporary function nan as 'cc.shanruifeng.functions.math.UDFMathIsNaN';
+create temporary function from_base as 'cc.shanruifeng.functions.math.UDFMathFromBase';
+create temporary function to_base as 'cc.shanruifeng.functions.math.UDFMathToBase';
+create temporary function cosine_similarity as 'cc.shanruifeng.functions.math.UDFMathCosineSimilarity';
 ```
 
 You can use these statements on hive cli env get detail of function.
@@ -257,6 +286,15 @@ select array_concat(array(16,12,18,9,null), array(14,9,6,18,null)) => [16,12,18,
 select array_value_count(array(16,13,12,13,18,16,9,18), 13) => 2
 select array_slice(array(16,13,12,13,18,16,9,18), -2, 3) => [9,18]
 select array_element_at(array(16,13,12,13,18,16,9,18), -1) => 18
+select array_filter(array(16,13), 'x -> x > 15') => [16]
+select array_filter(array('a','b'), 'x -> x == \'a\'') => [a]
+select array_filter(array(true, false, NULL), 'x -> x != null && x') => [true]
+select array_filter(array(array('abc', null, '123'), array ('def', 'x', '456')), 'x -> x.get(1) == null') => [['abc', null, '123']]
+select array_shuffle(array(16,12,18,9))
+select sequence(1, 5) => [1, 2, 3, 4, 5]
+select sequence(5, 1) => [5, 4, 3, 2, 1]
+select sequence(1, 9, 4) => [1, 5, 9]
+select sequence('2016-04-12 00:00:00', '2016-04-14 00:00:00', 24*3600*1000) => ['2016-04-12 00:00:00', '2016-04-13 00:00:00', '2016-04-14 00:00:00']
 ```
 
 ```
@@ -302,3 +340,7 @@ select gcj_extract_wgs(39.915, 116.404) => {"lng":116.39775549316407,"lat":39.91
 ```
 select url_encode('http://shanruifeng.cc/') => http%3A%2F%2Fshanruifeng.cc%2F
 ```
+
+```
+select cosine_similarity(map_build(array['a'], array[1.0]), map_build(array['a'], array[2.0])); => 1.0
+```
diff --git a/pom.xml b/pom.xml
@@ -6,7 +6,7 @@
 
     <groupId>cc.shanruifeng</groupId>
     <artifactId>hive-third-functions</artifactId>
-    <version>2.1.3</version>
+    <version>3.0.0</version>
 
     <properties>
         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
@@ -24,6 +24,7 @@
         <dep.jackson.version>2.4.4</dep.jackson.version>
         <dep.jmh.version>1.9.3</dep.jmh.version>
         <junit.version>4.12</junit.version>
+        <lambda.from.string.version>1.6</lambda.from.string.version>
     </properties>
 
     <dependencyManagement>
@@ -70,6 +71,12 @@
                 <version>${dep.airlift.version}</version>
             </dependency>
 
+            <dependency>
+                <groupId>io.airlift</groupId>
+                <artifactId>slice</artifactId>
+                <version>0.35</version>
+            </dependency>
+
             <dependency>
                 <groupId>com.fasterxml.jackson.core</groupId>
                 <artifactId>jackson-core</artifactId>
@@ -87,6 +94,37 @@
                 <artifactId>junit</artifactId>
                 <version>${junit.version}</version>
             </dependency>
+
+            <dependency>
+                <groupId>pl.joegreen</groupId>
+                <artifactId>lambda-from-string</artifactId>
+                <version>${lambda.from.string.version}</version>
+            </dependency>
+
+            <dependency>
+                <groupId>org.apache.commons</groupId>
+                <artifactId>commons-math3</artifactId>
+                <version>3.6.1</version>
+            </dependency>
+
+            <dependency>
+                <groupId>com.teradata</groupId>
+                <artifactId>re2j-td</artifactId>
+                <version>1.4</version>
+            </dependency>
+
+            <dependency>
+                <groupId>org.apache.lucene</groupId>
+                <artifactId>lucene-analyzers-common</artifactId>
+                <version>7.2.1</version>
+                <exclusions>
+                    <exclusion>
+                        <groupId>org.apache.lucene</groupId>
+                        <artifactId>lucene-core</artifactId>
+                    </exclusion>
+                </exclusions>
+            </dependency>
+
         </dependencies>
     </dependencyManagement>
 
@@ -128,6 +166,11 @@
             <artifactId>json</artifactId>
         </dependency>
 
+        <dependency>
+            <groupId>io.airlift</groupId>
+            <artifactId>slice</artifactId>
+        </dependency>
+
         <dependency>
             <groupId>com.fasterxml.jackson.core</groupId>
             <artifactId>jackson-core</artifactId>
@@ -138,6 +181,26 @@
             <artifactId>jackson-databind</artifactId>
         </dependency>
 
+        <dependency>
+            <groupId>pl.joegreen</groupId>
+            <artifactId>lambda-from-string</artifactId>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-math3</artifactId>
+        </dependency>
+
+        <dependency>
+            <groupId>com.teradata</groupId>
+            <artifactId>re2j-td</artifactId>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.lucene</groupId>
+            <artifactId>lucene-analyzers-common</artifactId>
+        </dependency>
+
         <dependency>
             <groupId>junit</groupId>
             <artifactId>junit</artifactId>
@@ -176,8 +239,8 @@
                 <artifactId>maven-compiler-plugin</artifactId>
                 <version>3.1</version>
                 <configuration>
-                    <source>${project.build.targetJdk}</source>
-                    <target>${project.build.targetJdk}</target>
+                    <source>8</source>
+                    <target>8</target>
                     <encoding>${project.build.sourceEncoding}</encoding>
                     <showWarnings>true</showWarnings>
                 </configuration>