diff --git a/.github/README.md b/.github/README.md
new file mode 100644
index 0000000000000..a621d35e814b7
--- /dev/null
+++ b/.github/README.md
@@ -0,0 +1,10 @@
+## How to update the Pull Request Template
+
+When updating the pr template, you must consider if updates need to be made to scripts/pr_compliance.py
+
+## What are the files in workflows?
+- bot.yml: runs the hudi unit tests with various versions of scala, spark, and flink
+- pr_compliance.yml: checks pr titles and main comment to make sure that everything is filled out and formatted properly
+- update_pr_compliance: runs the pr_compliance tests when scripts/pr_compliance.py is updated
+
+
diff --git a/.github/workflows/pr_compliance.yml b/.github/workflows/pr_compliance.yml
new file mode 100644
index 0000000000000..67affbb7b749f
--- /dev/null
+++ b/.github/workflows/pr_compliance.yml
@@ -0,0 +1,21 @@
+name: validate pr
+on:
+  pull_request:
+    types: [opened, edited, reopened, synchronize]
+    branches:
+      - master
+
+jobs:
+  validate-pr:
+    runs-on: ubuntu-latest
+    env:
+      REQUEST_BODY: ${{ github.event.pull_request.body }}
+      REQUEST_TITLE: ${{ github.event.pull_request.title }}
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v3
+      - name: run script
+        run: python3 scripts/pr_compliance.py > test.log || { echo "::error::pr_compliance.py $(cat test.log)" && exit 1; }
+
+
+
diff --git a/.github/workflows/update_pr_compliance.yml b/.github/workflows/update_pr_compliance.yml
new file mode 100644
index 0000000000000..2989617f4cc01
--- /dev/null
+++ b/.github/workflows/update_pr_compliance.yml
@@ -0,0 +1,18 @@
+name: Update Pr Compliance
+
+on:
+  pull_request:
+    types: [opened, edited, reopened, synchronize]
+    branches:
+      - master
+    paths:
+      - scripts/pr_compliance.py
+  
+jobs:
+  run-tests:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v3
+      - name: run script
+        run: python3 scripts/pr_compliance.py run-tests > test.log || { echo "::error::pr_compliance.py $(cat test.log)" && exit 1; }
diff --git a/scripts/pr_compliance.py b/scripts/pr_compliance.py
new file mode 100644
index 0000000000000..ea1e2ce3e2241
--- /dev/null
+++ b/scripts/pr_compliance.py
@@ -0,0 +1,563 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import re
+import os
+import sys
+
+# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #  
+#  _____ _ _   _       __     __    _ _     _       _   _                 #
+# |_   _(_) |_| | ___  \ \   / /_ _| (_) __| | __ _| |_(_) ___  _ __      #  
+#   | | | | __| |/ _ \  \ \ / / _` | | |/ _` |/ _` | __| |/ _ \| '_ \     #
+#   | | | | |_| |  __/   \ V / (_| | | | (_| | (_| | |_| | (_) | | | |    #
+#   |_| |_|\__|_|\___|    \_/ \__,_|_|_|\__,_|\__,_|\__|_|\___/|_| |_|    #
+#                                                                         # 
+# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #                                                                 
+
+
+#validator for titles
+def validate_title(title: str):
+    return len(re.findall('(\[HUDI\-[0-9]{1,}\]|\[MINOR\])',title)) == 1
+
+#runs an individual title test
+#
+#   PARAMS
+# name: str - the name of the test
+# title: str - the title to test
+# isTrue: bool - is the title valid
+#
+#   RETURN
+# bool - True if the test passed, False if it failed
+def run_title_test(name: str, title: str, isTrue: bool):
+    if isTrue != validate_title(title):
+        print(f"{name} - FAILED")
+        return False
+    print(f"{name} - PASSED")
+    return True
+
+#tests for title validation
+#
+#   RETURN
+# bool - True if all tests passed, False if any tests fail
+def test_title():
+    test_return = True
+    #test that position doesn't matter for issue
+    test_return = run_title_test("issue at front", "[HUDI-1324] my fake pr", True) and test_return
+    test_return =  run_title_test("issue in middle", " my [HUDI-1324] fake pr", True) and test_return
+    test_return =  run_title_test("issue at end", " my fake pr [HUDI-1324]", True)  and test_return
+
+    #test position doesn't matter for minor
+    test_return = run_title_test("minor at front", "[MINOR] my fake pr", True) and test_return
+    test_return = run_title_test("minor in middle", " my [MINOR] fake pr", True) and test_return
+    test_return = run_title_test("minor at end", " my fake pr [MINOR]", True) and test_return
+
+    #test that more than 4 nums is also ok
+    test_return = run_title_test("more than 4 nums in issue", "[HUDI-12345] my fake pr", True) and test_return
+
+    #test that 1 nums is also ok
+    test_return = run_title_test("1 num in issue", "[HUDI-1] my fake pr", True) and test_return
+
+    #no nums not ok
+    test_return = run_title_test("no nums in issue", "[HUDI-] my fake pr", False) and test_return
+
+    #no brackets not ok
+    test_return = run_title_test("no brackets around issue", "HUDI-1234 my fake pr", False) and test_return
+    test_return = run_title_test("no brackets around minor", "MINOR my fake pr", False) and test_return
+    
+    #lowercase not ok
+    test_return = run_title_test("lowercase hudi", "[hudi-1234] my fake pr", False) and test_return
+    test_return = run_title_test("lowercase minor", "[minor] my fake pr", False) and test_return
+    
+    #duplicate not ok
+    test_return = run_title_test("duplicate issue", "[HUDI-1324][HUDI-1324] my fake pr", False) and test_return
+    test_return = run_title_test("duplicate minor", "[MINOR] my fake pr [MINOR]", False) and test_return
+    
+    #hudi and minor not ok
+    test_return = run_title_test("issue and minor", "[HUDI-1324] my [MINOR]fake pr", False) and test_return
+    print("*****")
+    if test_return:
+        print("All title tests passed")
+    else:
+        print("Some title tests failed")
+    print("*****")
+
+    return test_return
+
+
+# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
+#  ____            _        __     __    _ _     _       _   _                #
+# | __ )  ___   __| |_   _  \ \   / /_ _| (_) __| | __ _| |_(_) ___  _ __     #
+# |  _ \ / _ \ / _` | | | |  \ \ / / _` | | |/ _` |/ _` | __| |/ _ \| '_ \    #
+# | |_) | (_) | (_| | |_| |   \ V / (_| | | | (_| | (_| | |_| | (_) | | | |   #
+# |____/ \___/ \__,_|\__, |    \_/ \__,_|_|_|\__,_|\__,_|\__|_|\___/|_| |_|   #
+#                    |___/                                                    #
+#                                                                             #
+# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #                                            
+    
+#Enums for the the outcome of parsing a single line
+class Outcomes:
+    #error was found so we should stop parsing and exit with error
+    ERROR = 0
+
+    #continue to parse the next line
+    CONTINUE = 1
+
+    #All requirements for the current section have been met so we should start 
+    #parsing the next section
+    NEXTSECTION = 2
+
+    #parsing has concluded succesfully, exit with no error
+    SUCCESS = 3
+
+
+#Holds the data for a section
+#   PARAMS
+# name: str - name of the parse section
+# identifier: str - line that signifies the start of a section
+# linesAfter: set of str - default lines in the template that we ignore when 
+#                          verifying that the user filled out the section
+# nextSection: str - The name of the next section or "SUCCESS" if this is the 
+#                    last section
+class ParseSectionData:
+    def __init__(self, name: str, identifier: str, linesAfter, nextSection: str):
+        self.name = name
+        self.identifier = identifier
+        self.linesAfter = linesAfter
+        self.nextSection = nextSection
+    
+    #returns true if line matches the identifier
+    def identify(self, line: str):
+        return line == self.identifier
+    
+    #returns true if user has added new text to the section
+    def identifyAfter(self, line: str):
+        return line not in self.linesAfter
+
+
+#Special holder of data for risk level because the identifier line is modified
+#by the user
+class RiskLevelData(ParseSectionData):
+    def __init__(self, name: str, identifier: str, linesAfter, nextSection: str):
+        super().__init__(name, identifier, linesAfter, nextSection)
+    
+    #we check that the line start with the identifier because the identifier 
+    #line will be modified when filled out correctly
+    def identify(self, line: str):
+        return line.startswith(self.identifier)
+
+#Holds all the parse section data in a map
+#   PARAMS
+# psd: list of ParseSectionData - a list of the data for all the parse sections
+class ParseSections:
+    def __init__(self, psd):
+        self.sections = {}
+        for x in psd:
+            self.sections[x.name] = x
+    
+    #returns true if line is an identifier for a section that is not value
+    #   PARAMS
+    # line: str - the line that we are parsing
+    # value: str - the name of the parse section that we are parsing
+    def validateOthers(self, line: str, value: str):
+        for name in self.sections:
+            if name != value:
+                if self.sections[name].identify(line):
+                    return True
+        return False    
+    
+    #returns the ParseSectionData that is named name
+    def get(self, name):
+        return self.sections.get(name)
+
+
+#Main class used to parse a section
+class ParseSection:
+    def __init__(self, data: ParseSectionData, sections: ParseSections, debug=False):
+        #set to true when the sections identifier is found
+        self.found = False
+
+        #true if debug messages should be printed
+        self.debug = debug
+
+        #the data for this parse section
+        self.data = data
+
+        #data of all the parse sections
+        self.sections = sections
+
+    #prints error message if debug is set to true
+    def error(self, message):
+        if self.debug:
+            print(f"ERROR:(state: {self.data.name}, found: {self.found}, message: {message}")
+
+    #returns the name of the next section
+    def nextSection(self):
+        return self.data.nextSection
+
+    #Returns true if we have already found the section identifier and line is 
+    #not in the default template
+    def validateAfter(self, line):
+        return self.found and self.data.identifyAfter(line)
+
+    #Decides what outcome occurs when the section identifier is found
+    def processIdentify(self, line):
+        if self.found:
+            #since we have already found the identifier, this means that we have
+            #found a duplicate of the identifier
+            self.error(f"duplicate line \"{line}\"")
+            return Outcomes.ERROR
+        self.found = True
+        return Outcomes.CONTINUE
+    
+    #Decides the outcome state by processing line
+    def validateLine(self,line):
+        if self.data.identify(line):
+            #we have found the identifier so we should decide what to do
+            return self.processIdentify(line)
+        elif self.sections.validateOthers(line, self.data.name):
+            #we have found the identifier for another section
+            self.error(f"Out of order section or missing description \"{line}\"")
+            return Outcomes.ERROR
+        elif self.validateAfter(line):
+            #the pr author has added new text to this section so we consider it
+            #to be filled out
+            if self.nextSection() == "SUCCESS":
+                #if next section is "SUCCESS" then there are no more sections 
+                #to process
+                return Outcomes.SUCCESS
+            return Outcomes.NEXTSECTION
+        return Outcomes.CONTINUE
+
+#We do not check this section for data
+#currently just used for the checklist where they just need to check boxes
+class NoDataSection(ParseSection):
+    def __init__(self, data: ParseSectionData, sections: ParseSections, debug=False):
+        super().__init__(data, sections, debug)
+
+    #After finding the identifier we don't need to look for anything else so we
+    #can just go to the next section or terminate if this is the last
+    def processIdentify(self, line):
+        o = super().processIdentify(line)
+        if o  == Outcomes.CONTINUE:
+            if self.nextSection() == "SUCCESS":
+                return Outcomes.SUCCESS
+            else:
+                return Outcomes.NEXTSECTION
+        return o
+
+#Risk level has different processing because the pr author will modify the 
+#identifier and doesn't need to add description if risk is none or low
+class RiskLevelSection(ParseSection):
+    def __init__(self, data: ParseSectionData, sections: ParseSections, debug=False):
+        super().__init__(data, sections, debug)
+
+    def processIdentify(self, line):
+        if self.found:
+            #since we have already found the identifier, this means that we have
+            #found a duplicate of the identifier
+            self.error(f"duplicate line starting with \"{self.identifier}\"")
+            return Outcomes.ERROR
+        if line == "**Risk level: none | low | medium | high**":
+            #the user has not modified this line so a risk level was not chosen
+            self.error("risk level not chosen")
+            return Outcomes.ERROR
+        if "NONE" in line.upper() or "LOW" in line.upper():
+            # an explanation is not required for none or low so we can just
+            # move on to the next section or terminate if this is the last
+            if self.nextSection() == "SUCCESS":
+                return Outcomes.SUCCESS
+            else:
+                return Outcomes.NEXTSECTION
+        elif "MEDIUM" in line.upper() or "HIGH" in line.upper():
+            # an explanation is required so we don't change state
+            self.found = True
+            return Outcomes.CONTINUE
+        else:
+            #the pr author put something weird in for risk level
+            self.error("invalid choice for risk level")
+            return Outcomes.ERROR
+
+#Class that orchestrates the validation of the entire body
+class ValidateBody:
+    def __init__(self, body: str, firstSection: str, sections: ParseSections, debug=False):
+        #the body of the pr post
+        self.body = body
+
+        #true if debug messages should be printed
+        self.debug = debug
+
+        #the name of the first section of the post
+        self.firstSection = firstSection
+
+        #the current section we are processing
+        self.section = None
+
+        #ParseSections which holds the data for all the sections
+        self.sections = sections
+
+    #Updates self.section to the next section
+    def nextSection(self):
+        #get the name of the section to parse
+        sectionName = self.firstSection
+        if self.section is not None:
+            sectionName = self.section.nextSection()
+
+        #get the data for that section
+        data = self.sections.get(sectionName)
+        if data is None:
+            print(f"parse section {sectionName} not found")
+            exit(-3)
+        
+        #create the section
+        if data.name == "RISKLEVEL":
+            self.section = RiskLevelSection(data=data, sections=self.sections, debug=self.debug)
+        elif data.name == "CHECKLIST":
+            self.section = NoDataSection(data=data, sections=self.sections, debug=self.debug)
+        else:
+            self.section = ParseSection(data=data, sections=self.sections, debug=self.debug)
+
+    #Returns true if the body complies with the validation rules else false
+    def validate(self):
+        #instantiate self.section since it starts null
+        self.nextSection()
+
+        #validate each line
+        for line in self.body.splitlines():
+            #ignore empty lines
+            if len(line) == 0:
+                continue
+
+            #run the parse section validation
+            o = self.section.validateLine(line)
+            
+            #decide what to do based on outcome
+            if o == Outcomes.ERROR:
+                return False
+            elif o == Outcomes.SUCCESS:
+                return True
+            elif o == Outcomes.NEXTSECTION:
+                self.nextSection()
+        #if we get through all the lines without a success outcome, then the 
+        #body does not comply
+        self.section.error("template is not filled out properly")
+        return False
+        
+#Generate the validator for the current template.
+#needs to be manually updated
+def make_default_validator(body, debug=False):
+    changelogs = ParseSectionData("CHANGELOGS",
+        "### Change Logs",
+        {"_Describe context and summary for this change. Highlight if any code was copied._"},
+        "IMPACT")
+    impact = ParseSectionData("IMPACT",
+        "### Impact",
+        {"_Describe any public API or user-facing feature change or any performance impact._"},
+        "RISKLEVEL")
+    risklevel = RiskLevelData("RISKLEVEL",
+        "**Risk level:",
+        {"_Choose one. If medium or high, explain what verification was done to mitigate the risks._"},
+        "CHECKLIST")
+    checklist = ParseSectionData("CHECKLIST",
+        "### Contributor's checklist",
+        {},
+        "SUCCESS")
+    parseSections = ParseSections([changelogs, impact, risklevel, checklist])
+
+    return ValidateBody(body, "CHANGELOGS", parseSections, debug)
+
+
+#takes a list of strings and returns a string of those lines separated by \n
+def joinLines(lines):
+    return "\n".join(lines)
+
+#runs a test for parsing the body
+#   PARAMS
+# name: str - the name of the test
+# body: str - the body to parse
+# isTrue: bool - True if the body complies with our validation rules
+# debug: bool - True if we want to print debug information
+def run_test(name: str, body: str, isTrue: bool, debug: bool):
+    validator = make_default_validator(body, debug)
+    if isTrue != validator.validate():
+        print(f"{name} - FAILED")
+        return False
+    print(f"{name} - PASSED")
+    return True
+
+# Given a list of sections which are lists of strings, it combines them into one
+# giant string that is the body to be parsed
+def build_body(sections):
+    res = ""
+    for s in sections:
+        res += joinLines(s) + "\n"
+    return res
+
+# Tests for validating the body of a pr. Returns true if all tests pass
+def test_body():
+    DEBUG_MESSAGES = False
+    #Create sections that we will combine to create bodies to test validation on
+    template_changelogs = [
+         "### Change Logs",
+        "",
+        "_Describe context and summary for this change. Highlight if any code was copied._",
+        ""
+    ]
+
+    good_changelogs = template_changelogs.copy()
+    good_changelogs[1] = "changelogs description"
+
+    template_impact = [
+        "### Impact",
+        "",
+        "_Describe any public API or user-facing feature change or any performance impact._",
+        ""
+    ]
+
+    good_impact = template_impact.copy()
+    good_impact[1] = "impact description"
+
+    template_risklevel = [
+        "**Risk level: none | low | medium | high**",
+        "",
+        "_Choose one. If medium or high, explain what verification was done to mitigate the risks._",
+        ""
+    ]
+
+    good_risklevel_none = template_risklevel.copy()
+    good_risklevel_none[0] = "**Risk level: none**"
+
+    good_risklevel_medium = template_risklevel.copy()
+    good_risklevel_medium[0] = "**Risk level: medium**"
+    good_risklevel_medium[1] = "risklevel description" 
+
+    template_checklist = [
+        "### Contributor's checklist",
+        "",
+        "- [ ] Read through [contributor's guide](https://hudi.apache.org/contribute/how-to-contribute)",
+        "- [ ] Change Logs and Impact were stated clearly",
+        "- [ ] Adequate tests were added if applicable",
+        "- [ ] CI passed"
+    ]
+
+    #list of sections that when combined form a valid body
+    good_sections = [good_changelogs, good_impact, good_risklevel_none, template_checklist]
+
+    #list of sections that when combined form the template
+    template_sections = [template_changelogs, template_impact, template_risklevel, template_checklist]
+
+    tests_passed = True
+    #Test section not filled out
+    for i in range(len(good_sections)-1):
+        test_sections = []
+        for j in range(len(good_sections)):
+            if j != i:
+                test_sections.append(good_sections[j].copy())
+            else:
+                test_sections.append(template_sections[j].copy())
+        tests_passed = run_test(f"template section not filled out: {i}", build_body(test_sections), False, DEBUG_MESSAGES) and tests_passed
+
+    #Test duplicate section
+    for i in range(len(good_sections)-1):
+        test_sections = []
+        for j in range(len(good_sections)):
+            test_sections.append(good_sections[j].copy())
+            if j == i:
+                test_sections.append(good_sections[j].copy())
+        tests_passed = run_test(f"duplicate section: {i}", build_body(test_sections), False, DEBUG_MESSAGES) and tests_passed
+    
+    #Test out of order section
+    for i in range(len(good_sections)):
+        test_sections = []
+        for j in range(len(good_sections)):
+            test_sections.append(good_sections[j].copy())
+        k = (i + 3) % len(good_sections)
+        test_sections[i], test_sections[k] = test_sections[k],test_sections[i]
+        tests_passed = run_test(f"Swapped sections: {i}, {k}", build_body(test_sections), False, DEBUG_MESSAGES) and tests_passed
+
+    #Test missing section
+    for i in range(len(good_sections)):
+        test_sections = []
+        for j in range(len(good_sections)):
+            if i != j:
+                test_sections.append(good_sections[j].copy())
+        tests_passed = run_test(f"Missing Section: {i}", build_body(test_sections), False, DEBUG_MESSAGES) and tests_passed
+    
+    #Test good body with risk level of none:
+    tests_passed = run_test("good documentation. risk level none", build_body(good_sections), True, DEBUG_MESSAGES) and tests_passed
+
+    #Test good body with risk level of medium:
+    risk_level_index = 2
+    good_medium_risk_level = good_sections.copy()
+    good_medium_risk_level[risk_level_index] = good_risklevel_medium
+    tests_passed = run_test("good documentation. risk level medium", build_body(good_medium_risk_level), True, DEBUG_MESSAGES) and tests_passed
+
+    #Test good body except medium risk level and there is no description
+    bad_medium_risk_level = good_sections.copy()
+    bad_risklevel_medium = good_risklevel_medium.copy()
+    bad_risklevel_medium[1] = ""
+    bad_medium_risk_level[risk_level_index] = bad_risklevel_medium
+    tests_passed = run_test("medium risk level but no description", build_body(bad_medium_risk_level), False, DEBUG_MESSAGES) and tests_passed
+
+    #Test unknown risk level:
+    unknow_risk_level = good_sections.copy()
+    unknow_risk_level_section = template_risklevel.copy()
+    unknow_risk_level_section[0] = "**Risk level: unknown**"
+    unknow_risk_level[risk_level_index] = unknow_risk_level_section
+    tests_passed = run_test("unknown risk level", build_body(unknow_risk_level), False, DEBUG_MESSAGES) and tests_passed
+
+    print("*****")
+    if tests_passed:
+        print("All body tests passed")
+    else:
+        print("Some body tests failed")
+    print("*****")
+    
+    return tests_passed
+
+
+
+
+
+if __name__ == '__main__':
+    if len(sys.argv) > 1:
+        title_tests = test_title()
+        body_tests = test_body()
+        if title_tests and body_tests:
+            exit(0)
+        else:
+            exit(-1)
+
+
+    title = os.getenv("REQUEST_TITLE")
+    body = os.getenv("REQUEST_BODY")
+
+    if title is None:
+        print("no title")
+        exit(-1)
+
+    if not validate_title(title):
+        print("invalid title")
+        exit(-1)
+
+    if body is None:
+        print("no pr body")
+        exit(-1)
+
+    validator = make_default_validator(body,True)
+    if not validator.validate():
+        exit(-1)
+    exit(0)