diff --git a/.github/workflows/alert-on-failed-automerge.yml b/.github/workflows/alert-on-failed-automerge.yml deleted file mode 100644 index 31582b50d1..0000000000 --- a/.github/workflows/alert-on-failed-automerge.yml +++ /dev/null @@ -1,49 +0,0 @@ -name: Check Suite Failure Notification - -on: - # whenever a workflow suite completes trigger this - check_suite: - types: - - completed - -jobs: - notify_on_failure: - if: ${{ github.event.check_suite.conclusion == 'failure' }} - runs-on: ubuntu-24.04 - - steps: - - name: Checkout code - uses: actions/checkout@v3 - - - name: Check if the PR has "automerge" label - id: automerge_check - run: | - pr_number=$(jq -r '.check_suite.pull_requests[0].number' <<< "${{ toJson(github.event) }}") - if [ -z "$pr_number" ]; then - echo "No PR associated with this check suite." - echo "::set-output name=result::false" - exit 0 - fi - - labels=$(curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ - https://api.github.com/repos/${{ github.repository }}/issues/$pr_number/labels) - - echo "Labels: $labels" - if echo "$labels" | grep -q "automerge"; then - echo "::set-output name=result::true" - else - echo "::set-output name=result::false" - fi - - - name: Send Slack notification if the PR has "automerge" label - if: ${{ steps.automerge_check.outputs.result == 'true' }} - uses: rtCamp/action-slack-notify@v2 - env: - SLACK_CHANNEL: subscriptions-slack-testing - SLACK_COLOR: ${{ job.status }} # or a specific color like 'good' or '#ff00ff' - SLACK_ICON: https://github.com/posthog.png?size=48 - SLACK_USERNAME: Max Hedgehog - SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_URL }} - with: - message: "PR #${{ github.event.check_suite.pull_requests[0].number }} failed a check suite and is labeled 'automerge'. Please investigate!" - status: failure diff --git a/.github/workflows/automerge.yml b/.github/workflows/automerge.yml deleted file mode 100644 index 4b2a41b913..0000000000 --- a/.github/workflows/automerge.yml +++ /dev/null @@ -1,35 +0,0 @@ -name: Automerge - -env: - MERGE_METHOD: 'squash' - MERGE_RETRY_SLEEP: 300000 - -on: - pull_request: - types: - - labeled - - unlabeled - - synchronize - - opened - - edited - - ready_for_review - - reopened - - unlocked - check_suite: - types: - - completed - status: {} - -jobs: - automerge: - name: Automerge if requested - runs-on: ubuntu-24.04 - env: - IS_POSTHOG_BOT_AVAILABLE: ${{ secrets.POSTHOG_BOT_GITHUB_TOKEN != '' }} - steps: - - name: Automerge - if: env.IS_POSTHOG_BOT_AVAILABLE == 'true' - uses: pascalgn/automerge-action@v0.16.3 - env: - GITHUB_TOKEN: ${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }} - - run: echo diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml deleted file mode 100644 index 2ff87672fd..0000000000 --- a/.github/workflows/benchmark.yml +++ /dev/null @@ -1,172 +0,0 @@ -name: Benchmark - -on: - pull_request: - branches: ['*'] - paths: - - .github/workflows/benchmark.yml - schedule: - - cron: '0 4 * * 1-5' # Mon-Fri 4AM UTC - workflow_dispatch: {} - -concurrency: 'benchmarks' # Ensure only one of this runs at a time - -jobs: - run-benchmarks: - name: Clickhouse queries - runs-on: ubuntu-20.04 - environment: clickhouse-benchmarks - - # Benchmarks are expensive to run so we only run them (periodically) against master branch and for PRs labeled `performance` - if: ${{ github.repository == 'PostHog/posthog' && (github.ref == 'refs/heads/master' || contains(github.event.pull_request.labels.*.name, 'performance')) }} - - env: - DATABASE_URL: 'postgres://posthog:posthog@localhost:5432/posthog' - REDIS_URL: 'redis://localhost' - DEBUG: '1' - CLICKHOUSE_DATABASE: posthog - CLICKHOUSE_HOST: ${{ secrets.BENCHMARKS_CLICKHOUSE_HOST }} - CLICKHOUSE_USER: ${{ secrets.BENCHMARKS_CLICKHOUSE_USER }} - CLICKHOUSE_PASSWORD: ${{ secrets.BENCHMARKS_CLICKHOUSE_PASSWORD }} - CLICKHOUSE_SECURE: 'false' - CLICKHOUSE_VERIFY: 'false' - SECRET_KEY: '6b01eee4f945ca25045b5aab440b953461faf08693a9abbf1166dc7c6b9772da' # unsafe - for testing only - BENCHMARK: '1' - - steps: - - uses: actions/checkout@v3 - with: - # Checkout repo with full history - fetch-depth: 0 - - - name: Check out PostHog/benchmarks-results repo - uses: actions/checkout@v3 - with: - path: ee/benchmarks/results - repository: PostHog/benchmark-results - token: ${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }} - - - name: Stop/Start stack with Docker Compose - run: | - docker compose -f docker-compose.dev.yml down - docker compose -f docker-compose.dev.yml up -d - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: 3.11.9 - cache: 'pip' - cache-dependency-path: '**/requirements*.txt' - token: ${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }} - - # uv is a fast pip alternative: https://github.com/astral-sh/uv/ - - run: pip install uv - - - name: Install SAML (python3-saml) dependencies - shell: bash - run: | - sudo apt-get update - sudo apt-get install libxml2-dev libxmlsec1-dev libxmlsec1-openssl - - - name: Install python dependencies - run: | - uv pip install --system -r requirements-dev.txt - uv pip install --system -r requirements.txt - - - name: Install asv - run: uv pip install --system asv==0.5.1 virtualenv - - - name: Set up PostHog - run: | - python manage.py migrate & wait - python manage.py setup_dev --no-data - - - name: Configure benchmarks - run: asv machine --config ee/benchmarks/asv.conf.json --yes --machine ci-benchmarks - - - name: Run benchmarks - run: asv run --config ee/benchmarks/asv.conf.json --show-stderr --strict - - - name: Compare results - run: | - asv compare $(cat ee/benchmarks/results/last-master-commit) HEAD --config ee/benchmarks/asv.conf.json --factor 1.2 | tee pr_vs_master.txt - asv compare $(cat ee/benchmarks/results/last-master-commit) HEAD --config ee/benchmarks/asv.conf.json --factor 1.2 --only-changed | tee pr_vs_master_changed.txt - - - name: Save last benchmarked commit - if: ${{ github.ref == 'refs/heads/master' }} - run: echo "${{ github.sha }}" | tee ee/benchmarks/results/last-master-commit - - - name: Generate HTML report of results - if: ${{ github.ref == 'refs/heads/master' }} - run: asv publish --config ee/benchmarks/asv.conf.json - - - name: Commit update for benchmark results - if: ${{ github.repository == 'PostHog/posthog' && github.ref == 'refs/heads/master' }} - uses: stefanzweifel/git-auto-commit-action@v5 - with: - repository: ee/benchmarks/results - branch: master - commit_message: 'Save benchmark results' - commit_user_name: PostHog Bot - commit_user_email: hey@posthog.com - commit_author: PostHog Bot - - - name: Upload results as artifacts - uses: actions/upload-artifact@v4 - with: - name: benchmarks - path: | - pr_vs_master.txt - pr_vs_master_changed.txt - - - name: Read benchmark output - if: ${{ github.event_name == 'pull_request' }} - id: pr_vs_master_changed - uses: juliangruber/read-file-action@v1 - with: - path: pr_vs_master_changed.txt - - - name: Read benchmark output (full) - if: ${{ github.event_name == 'pull_request' }} - id: pr_vs_master - uses: juliangruber/read-file-action@v1 - with: - path: pr_vs_master.txt - - - name: Find Comment - if: ${{ github.event_name == 'pull_request' }} - uses: peter-evans/find-comment@v2 - id: fc - with: - issue-number: ${{ github.event.number }} - comment-author: 'github-actions[bot]' - body-includes: ClickHouse query benchmark results from GitHub Actions - - - name: Create or update comment - if: ${{ github.event_name == 'pull_request' }} - uses: peter-evans/create-or-update-comment@v3 - with: - comment-id: ${{ steps.fc.outputs.comment-id }} - issue-number: ${{ github.event.number }} - body: | - ClickHouse query benchmark results from GitHub Actions - - Lower numbers are good, higher numbers are bad. A ratio less than 1 - means a speed up and greater than 1 means a slowdown. Green lines - beginning with `+` are slowdowns (the PR is slower then master or - master is slower than the previous release). Red lines beginning - with `-` are speedups. Blank means no changes. - - Significantly changed benchmark results (PR vs master) - ```diff - ${{ steps.pr_vs_master_changed.outputs.content }} - ``` - -
- Click to view full benchmark results - - ```diff - ${{ steps.pr_vs_master.outputs.content }} - ``` -
- edit-mode: replace diff --git a/.github/workflows/browserslist-update.yml b/.github/workflows/browserslist-update.yml deleted file mode 100644 index ff27f8d674..0000000000 --- a/.github/workflows/browserslist-update.yml +++ /dev/null @@ -1,40 +0,0 @@ -name: Update Browserslist database - -on: - schedule: - - cron: '0 12 * * MON' - workflow_dispatch: - -permissions: - contents: write - pull-requests: write - -jobs: - update-browserslist-database: - runs-on: ubuntu-latest - steps: - - name: Checkout repository - uses: actions/checkout@v3 - with: - fetch-depth: 0 - - - name: Configure git - run: | - git config --global user.email "action@github.com" - git config --global user.name "Browserslist Update Action" - - - name: Install pnpm - uses: pnpm/action-setup@v4 - - - name: Set up Node.js - uses: actions/setup-node@v4 - with: - node-version: 18.12.1 - - - name: Update Browserslist database and create PR if applies - uses: c2corg/browserslist-update-action@v2 - with: - github_token: ${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }} # This token has permission to open PRs - commit_message: 'build: update Browserslist db' - title: 'build: update Browserslist db' - labels: 'dependencies, automerge' diff --git a/.github/workflows/build-and-deploy-prod.yml b/.github/workflows/build-and-deploy-prod.yml deleted file mode 100644 index 9d5c7004ab..0000000000 --- a/.github/workflows/build-and-deploy-prod.yml +++ /dev/null @@ -1,29 +0,0 @@ -name: Notify Sentry - -# -# Comment the `on:` section below if you want to stop deploys -# -on: - push: - branches: - - master - paths-ignore: - - 'rust/**' - - 'livestream/**' - -jobs: - sentry: - name: Notify Sentry of a production release - runs-on: ubuntu-20.04 - if: github.repository == 'PostHog/posthog' - steps: - - name: Checkout master - uses: actions/checkout@v4 - - name: Notify Sentry - uses: getsentry/action-release@v1 - env: - SENTRY_AUTH_TOKEN: ${{ secrets.SENTRY_AUTH_TOKEN }} - SENTRY_ORG: posthog - SENTRY_PROJECT: posthog - with: - environment: production diff --git a/.github/workflows/build-hogql-parser.yml b/.github/workflows/build-hogql-parser.yml deleted file mode 100644 index 8feffe960e..0000000000 --- a/.github/workflows/build-hogql-parser.yml +++ /dev/null @@ -1,140 +0,0 @@ -name: Release hogql-parser - -on: - pull_request: - paths: - - hogql_parser/** - - .github/workflows/build-hogql-parser.yml - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -jobs: - check-version: - name: Check version legitimacy - runs-on: ubuntu-22.04 - outputs: - parser-release-needed: ${{ steps.version.outputs.parser-release-needed }} - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 # Fetching all for comparison since last push (not just last commit) - - - name: Check if hogql_parser/ has changed - id: changed-files - uses: tj-actions/changed-files@v43 - with: - since_last_remote_commit: true - files_yaml: | - parser: - - hogql_parser/** - - - name: Check if version was bumped - shell: bash - id: version - run: | - parser_release_needed='false' - if [[ ${{ steps.changed-files.outputs.parser_any_changed }} == 'true' ]]; then - published=$(curl -fSsl https://pypi.org/pypi/hogql-parser/json | jq -r '.info.version') - local=$(python hogql_parser/setup.py --version) - if [[ "$published" != "$local" ]]; then - parser_release_needed='true' - else - message_body="It looks like the code of \`hogql-parser\` has changed since last push, but its version stayed the same at $local. πŸ‘€\nMake sure to resolve this in \`hogql_parser/setup.py\` before merging!${{ github.event.pull_request.head.repo.full_name != 'PostHog/posthog' && '\nThis needs to be performed on a branch created on the PostHog/posthog repo itself. A PostHog team member will assist with this.' || ''}}" - curl -s -u posthog-bot:${{ secrets.POSTHOG_BOT_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} -X POST -d "{ \"body\": \"$message_body\" }" "https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/comments" - fi - fi - echo "parser-release-needed=$parser_release_needed" >> $GITHUB_OUTPUT - - build-wheels: - name: Build wheels on ${{ matrix.os }} - needs: check-version - runs-on: ${{ matrix.os }} - timeout-minutes: 30 - if: needs.check-version.outputs.parser-release-needed == 'true' && - github.event.pull_request.head.repo.full_name == 'PostHog/posthog' - strategy: - matrix: - # As of October 2023, GitHub doesn't have ARM Actions runners… and ARM emulation is insanely slow - # (20x longer) on the Linux runners (while being reasonable on the macOS runners). Hence, we use - # BuildJet as a provider of ARM runners - this solution saves a lot of time and consequently some money. - os: [ubuntu-22.04, buildjet-2vcpu-ubuntu-2204-arm, macos-12] - - steps: - - uses: actions/checkout@v4 - - - if: ${{ !endsWith(matrix.os, '-arm') }} - uses: actions/setup-python@v4 - with: - python-version: '3.11' - - # Compiling Python 3.11 from source on ARM. We tried using the "deadsnakes" ARM repo, but it was flakey. - - if: ${{ endsWith(matrix.os, '-arm') }} - name: Install Python 3.11 on ARM (compile from source) - run: | - sudo apt-get update - sudo apt-get install -y build-essential libssl-dev zlib1g-dev \ - libncurses5-dev libncursesw5-dev libreadline-dev libsqlite3-dev \ - libgdbm-dev libdb5.3-dev libbz2-dev libexpat1-dev liblzma-dev tk-dev - wget https://www.python.org/ftp/python/3.11.0/Python-3.11.0.tar.xz - tar -xf Python-3.11.0.tar.xz - cd Python-3.11.0 - ./configure --enable-optimizations - make -j 2 - sudo make altinstall - - - name: Build sdist - if: matrix.os == 'ubuntu-22.04' # Only build the sdist once - run: cd hogql_parser && python setup.py sdist - - - name: Install cibuildwheel - run: pip install cibuildwheel==2.16.* - - - name: Build wheels - run: cd hogql_parser && python -m cibuildwheel --output-dir dist - env: - MACOSX_DEPLOYMENT_TARGET: '12' # A modern target allows us to use C++20 - - - uses: actions/upload-artifact@v4 - with: - path: | - hogql_parser/dist/*.whl - hogql_parser/dist/*.tar.gz - if-no-files-found: error - - publish: - name: Publish on PyPI - needs: build-wheels - environment: pypi-hogql-parser - permissions: - id-token: write - runs-on: ubuntu-22.04 - steps: - - name: Fetch wheels - uses: actions/download-artifact@v4 - with: - name: artifact - path: dist/ - - - name: Publish package to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 - - - uses: actions/checkout@v4 - with: - token: ${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }} - ref: ${{ github.event.pull_request.head.ref }} - - - name: Update hogql-parser in requirements - shell: bash - run: | - local=$(python hogql_parser/setup.py --version) - sed -i "s/hogql-parser==.*/hogql-parser==${local}/g" requirements.in - sed -i "s/hogql-parser==.*/hogql-parser==${local}/g" requirements.txt - - - uses: EndBug/add-and-commit@v9 - with: - add: '["requirements.in", "requirements.txt"]' - message: 'Use new hogql-parser version' - default_author: github_actions - github_token: ${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }} diff --git a/.github/workflows/ci-backend-update-test-timing.yml b/.github/workflows/ci-backend-update-test-timing.yml deleted file mode 100644 index eb1c36329c..0000000000 --- a/.github/workflows/ci-backend-update-test-timing.yml +++ /dev/null @@ -1,49 +0,0 @@ -name: Backend CI - Update test timing - -on: - workflow_dispatch: - -env: - SECRET_KEY: '6b01eee4f945ca25045b5aab440b953461faf08693a9abbf1166dc7c6b9772da' # unsafe - for testing only - DATABASE_URL: 'postgres://posthog:posthog@localhost:5432/posthog' - REDIS_URL: 'redis://localhost' - CLICKHOUSE_HOST: 'localhost' - CLICKHOUSE_SECURE: 'False' - CLICKHOUSE_VERIFY: 'False' - TEST: 1 - OBJECT_STORAGE_ENABLED: 'True' - OBJECT_STORAGE_ENDPOINT: 'http://localhost:19000' - OBJECT_STORAGE_ACCESS_KEY_ID: 'object_storage_root_user' - OBJECT_STORAGE_SECRET_ACCESS_KEY: 'object_storage_root_password' - -jobs: - django: - name: Run Django tests and save test durations - runs-on: ubuntu-24.04 - steps: - - uses: actions/checkout@v3 - - - uses: ./.github/actions/run-backend-tests - with: - concurrency: 1 - group: 1 - token: ${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }} - python-version: '3.11.9' - clickhouse-server-image: 'clickhouse/clickhouse-server:24.8.7.41' - segment: 'FOSS' - person-on-events: false - - - name: Upload updated timing data as artifacts - uses: actions/upload-artifact@v4 - if: ${{ inputs.person-on-events != 'true' && inputs.clickhouse-server-image == 'clickhouse/clickhouse-server:24.8.7.41' }} - with: - name: timing_data-${{ inputs.segment }}-${{ inputs.group }} - path: .test_durations - retention-days: 2 - # - name: Save test durations - # uses: stefanzweifel/git-auto-commit-action@v5 - # with: - # commit_message: 'Save backend test durations' - # commit_user_name: PostHog Bot - # commit_user_email: hey@posthog.com - # commit_author: PostHog Bot diff --git a/.github/workflows/ci-backend.yml b/.github/workflows/ci-backend.yml deleted file mode 100644 index 22882cafb7..0000000000 --- a/.github/workflows/ci-backend.yml +++ /dev/null @@ -1,419 +0,0 @@ -# This workflow runs all of our backend django tests. -# -# If these tests get too slow, look at increasing concurrency and re-timing the tests by manually dispatching -# .github/workflows/ci-backend-update-test-timing.yml action -name: Backend CI - -on: - push: - branches: - - master - workflow_dispatch: - inputs: - clickhouseServerVersion: - description: ClickHouse server version. Leave blank for default - type: string - pull_request: - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - # This is so that the workflow run isn't canceled when a snapshot update is pushed within it by posthog-bot - # We do however cancel from container-images-ci.yml if a commit is pushed by someone OTHER than posthog-bot - cancel-in-progress: false - -env: - SECRET_KEY: '6b01eee4f945ca25045b5aab440b953461faf08693a9abbf1166dc7c6b9772da' # unsafe - for testing only - DATABASE_URL: 'postgres://posthog:posthog@localhost:5432/posthog' - REDIS_URL: 'redis://localhost' - CLICKHOUSE_HOST: 'localhost' - CLICKHOUSE_SECURE: 'False' - CLICKHOUSE_VERIFY: 'False' - TEST: 1 - CLICKHOUSE_SERVER_IMAGE_VERSION: ${{ github.event.inputs.clickhouseServerVersion || '' }} - OBJECT_STORAGE_ENABLED: 'True' - OBJECT_STORAGE_ENDPOINT: 'http://localhost:19000' - OBJECT_STORAGE_ACCESS_KEY_ID: 'object_storage_root_user' - OBJECT_STORAGE_SECRET_ACCESS_KEY: 'object_storage_root_password' - # tests would intermittently fail in GH actions - # with exit code 134 _after passing_ all tests - # this appears to fix it - # absolute wild tbh https://stackoverflow.com/a/75503402 - DISPLAY: ':99.0' -jobs: - # Job to decide if we should run backend ci - # See https://github.com/dorny/paths-filter#conditional-execution for more details - changes: - runs-on: ubuntu-24.04 - timeout-minutes: 5 - name: Determine need to run backend checks - # Set job outputs to values from filter step - outputs: - backend: ${{ steps.filter.outputs.backend }} - steps: - # For pull requests it's not necessary to checkout the code, but we - # also want this to run on master so we need to checkout - - uses: actions/checkout@v3 - - - uses: dorny/paths-filter@v2 - id: filter - with: - filters: | - backend: - # Avoid running backend tests for irrelevant changes - # NOTE: we are at risk of missing a dependency here. We could make - # the dependencies more clear if we separated the backend/frontend - # code completely - # really we should ignore ee/frontend/** but dorny doesn't support that - # - '!ee/frontend/**' - # including the negated rule appears to work - # but makes it always match because the checked file always isn't `ee/frontend/**` πŸ™ˆ - - 'ee/**/*' - - 'hogvm/**/*' - - 'posthog/**/*' - - 'bin/*.py' - - requirements.txt - - requirements-dev.txt - - mypy.ini - - pytest.ini - - frontend/src/queries/schema.json # Used for generating schema.py - - plugin-transpiler/src # Used for transpiling plugins - # Make sure we run if someone is explicitly change the workflow - - .github/workflows/ci-backend.yml - - .github/actions/run-backend-tests/action.yml - # We use docker compose for tests, make sure we rerun on - # changes to docker-compose.dev.yml e.g. dependency - # version changes - - docker-compose.dev.yml - - frontend/public/email/* - # These scripts are used in the CI - - bin/check_temporal_up - - bin/check_kafka_clickhouse_up - - backend-code-quality: - needs: changes - timeout-minutes: 30 - - name: Python code quality checks - runs-on: ubuntu-24.04 - - steps: - # If this run wasn't initiated by the bot (meaning: snapshot update) and we've determined - # there are backend changes, cancel previous runs - - uses: n1hility/cancel-previous-runs@v3 - if: github.actor != 'posthog-bot' && needs.changes.outputs.backend == 'true' - with: - token: ${{ secrets.GITHUB_TOKEN }} - - - uses: actions/checkout@v3 - with: - fetch-depth: 1 - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: 3.11.9 - cache: 'pip' - cache-dependency-path: '**/requirements*.txt' - token: ${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }} - - # uv is a fast pip alternative: https://github.com/astral-sh/uv/ - - run: pip install uv - - - name: Install SAML (python3-saml) dependencies - run: | - sudo apt-get update - sudo apt-get install libxml2-dev libxmlsec1 libxmlsec1-dev libxmlsec1-openssl - - - name: Install Python dependencies - run: | - uv pip install --system -r requirements.txt -r requirements-dev.txt - - - name: Check for syntax errors, import sort, and code style violations - run: | - ruff check . - - - name: Check formatting - run: | - ruff format --check --diff . - - - name: Add Problem Matcher - run: echo "::add-matcher::.github/mypy-problem-matcher.json" - - - name: Check static typing - run: | - mypy -p posthog | mypy-baseline filter - - - name: Check if "schema.py" is up to date - run: | - npm run schema:build:python && git diff --exit-code - - check-migrations: - needs: changes - if: needs.changes.outputs.backend == 'true' - timeout-minutes: 10 - - name: Validate Django and CH migrations - runs-on: ubuntu-24.04 - - steps: - - uses: actions/checkout@v3 - - - name: Stop/Start stack with Docker Compose - run: | - docker compose -f docker-compose.dev.yml down - docker compose -f docker-compose.dev.yml up -d - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: 3.11.9 - cache: 'pip' - cache-dependency-path: '**/requirements*.txt' - token: ${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }} - - # uv is a fast pip alternative: https://github.com/astral-sh/uv/ - - run: pip install uv - - - name: Install SAML (python3-saml) dependencies - run: | - sudo apt-get update - sudo apt-get install libxml2-dev libxmlsec1-dev libxmlsec1-openssl - - # First running migrations from master, to simulate the real-world scenario - - name: Checkout master - uses: actions/checkout@v3 - with: - ref: master - - - name: Install python dependencies for master - run: | - uv pip install --system -r requirements.txt -r requirements-dev.txt - - - name: Run migrations up to master - run: | - python manage.py migrate - - # Now we can consider this PR's migrations - - name: Checkout this PR - uses: actions/checkout@v3 - - - name: Install python dependencies for this PR - run: | - uv pip install --system -r requirements.txt -r requirements-dev.txt - - - name: Run migrations for this PR - run: | - python manage.py migrate - - - name: Check migrations - run: | - python manage.py makemigrations --check --dry-run - git fetch origin master - # `git diff --name-only` returns a list of files that were changed - added OR deleted OR modified - # With `--name-status` we get the same, but including a column for status, respectively: A, D, M - # In this check we exclusively care about files that were - # added (A) in posthog/migrations/. We also want to ignore - # initial migrations (0001_*) as these are guaranteed to be - # run on initial setup where there is no data. - git diff --name-status origin/master..HEAD | grep "A\sposthog/migrations/" | awk '{print $2}' | grep -v migrations/0001_ | python manage.py test_migrations_are_safe - - - name: Check CH migrations - run: | - # Same as above, except now for CH looking at files that were added in posthog/clickhouse/migrations/ - git diff --name-status origin/master..HEAD | grep "A\sposthog/clickhouse/migrations/" | awk '{print $2}' | python manage.py test_ch_migrations_are_safe - - django: - needs: changes - # increase for tmate testing - timeout-minutes: 30 - - name: Django tests – ${{ matrix.segment }} (persons-on-events ${{ matrix.person-on-events && 'on' || 'off' }}), Py ${{ matrix.python-version }}, ${{ matrix.clickhouse-server-image }} (${{matrix.group}}/${{ matrix.concurrency }}) - runs-on: ubuntu-24.04 - - strategy: - fail-fast: false - matrix: - python-version: ['3.11.9'] - clickhouse-server-image: ['clickhouse/clickhouse-server:24.8.7.41'] - segment: ['Core'] - person-on-events: [false, true] - # :NOTE: Keep concurrency and groups in sync - concurrency: [10] - group: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - include: - - segment: 'Temporal' - person-on-events: false - clickhouse-server-image: 'clickhouse/clickhouse-server:24.8.7.41' - python-version: '3.11.9' - concurrency: 3 - group: 1 - - segment: 'Temporal' - person-on-events: false - clickhouse-server-image: 'clickhouse/clickhouse-server:24.8.7.41' - python-version: '3.11.9' - concurrency: 3 - group: 2 - - segment: 'Temporal' - person-on-events: false - clickhouse-server-image: 'clickhouse/clickhouse-server:24.8.7.41' - python-version: '3.11.9' - concurrency: 3 - group: 3 - - steps: - # The first step is the only one that should run if `needs.changes.outputs.backend == 'false'`. - # All the other ones should rely on `needs.changes.outputs.backend` directly or indirectly, so that they're - # effectively skipped if backend code is unchanged. See https://github.com/PostHog/posthog/pull/15174. - - uses: actions/checkout@v3 - with: - fetch-depth: 1 - repository: ${{ github.event.pull_request.head.repo.full_name }} - ref: ${{ github.event.pull_request.head.ref }} - # Use PostHog Bot token when not on forks to enable proper snapshot updating - token: ${{ github.event.pull_request.head.repo.full_name == github.repository && secrets.POSTHOG_BOT_GITHUB_TOKEN || github.token }} - - - uses: ./.github/actions/run-backend-tests - if: needs.changes.outputs.backend == 'true' - with: - segment: ${{ matrix.segment }} - person-on-events: ${{ matrix.person-on-events }} - python-version: ${{ matrix.python-version }} - clickhouse-server-image: ${{ matrix.clickhouse-server-image }} - concurrency: ${{ matrix.concurrency }} - group: ${{ matrix.group }} - token: ${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }} - - - uses: EndBug/add-and-commit@v9 - # Skip on forks - # Also skip for persons-on-events runs, as we want to ignore snapshots diverging there - if: ${{ github.event.pull_request.head.repo.full_name == 'PostHog/posthog' && needs.changes.outputs.backend == 'true' && !matrix.person-on-events }} - with: - add: '["ee", "./**/*.ambr", "posthog/queries/", "posthog/migrations", "posthog/tasks", "posthog/hogql/"]' - message: 'Update query snapshots' - pull: --rebase --autostash # Make sure we're up-to-date with other segments' updates - default_author: github_actions - github_token: ${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }} - - - name: Check if any snapshot changes were left uncomitted - id: changed-files - if: ${{ github.event.pull_request.head.repo.full_name == 'PostHog/posthog' && needs.changes.outputs.backend == 'true' && !matrix.person-on-events }} - run: | - if [[ -z $(git status -s | grep -v ".test_durations" | tr -d "\n") ]] - then - echo 'files_found=false' >> $GITHUB_OUTPUT - else - echo 'diff=$(git status --porcelain)' >> $GITHUB_OUTPUT - echo 'files_found=true' >> $GITHUB_OUTPUT - fi - - - name: Fail CI if some snapshots have been updated but not committed - if: steps.changed-files.outputs.files_found == 'true' && steps.add-and-commit.outcome == 'success' - run: | - echo "${{ steps.changed-files.outputs.diff }}" - exit 1 - - - name: Archive email renders - uses: actions/upload-artifact@v4 - if: needs.changes.outputs.backend == 'true' && matrix.segment == 'Core' && matrix.person-on-events == false - with: - name: email_renders - path: posthog/tasks/test/__emails__ - retention-days: 5 - - async-migrations: - name: Async migrations tests - ${{ matrix.clickhouse-server-image }} - needs: changes - strategy: - fail-fast: false - matrix: - clickhouse-server-image: ['clickhouse/clickhouse-server:24.8.7.41'] - if: needs.changes.outputs.backend == 'true' - runs-on: ubuntu-24.04 - steps: - - name: 'Checkout repo' - uses: actions/checkout@v3 - with: - fetch-depth: 1 - - - name: Start stack with Docker Compose - run: | - export CLICKHOUSE_SERVER_IMAGE_VERSION=${{ matrix.clickhouse-server-image }} - docker compose -f docker-compose.dev.yml down - docker compose -f docker-compose.dev.yml up -d - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: 3.11.9 - cache: 'pip' - cache-dependency-path: '**/requirements*.txt' - token: ${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }} - - # uv is a fast pip alternative: https://github.com/astral-sh/uv/ - - run: pip install uv - - - name: Install SAML (python3-saml) dependencies - run: | - sudo apt-get update - sudo apt-get install libxml2-dev libxmlsec1-dev libxmlsec1-openssl - - - name: Install python dependencies - shell: bash - run: | - uv pip install --system -r requirements.txt -r requirements-dev.txt - - - name: Add kafka host to /etc/hosts for kafka connectivity - run: sudo echo "127.0.0.1 kafka" | sudo tee -a /etc/hosts - - - name: Set up needed files - run: | - mkdir -p frontend/dist - touch frontend/dist/index.html - touch frontend/dist/layout.html - touch frontend/dist/exporter.html - - - name: Wait for Clickhouse & Kafka - run: bin/check_kafka_clickhouse_up - - - name: Run async migrations tests - run: | - pytest -m "async_migrations" - - calculate-running-time: - name: Calculate running time - needs: [django, async-migrations] - runs-on: ubuntu-24.04 - if: # Run on pull requests to PostHog/posthog + on PostHog/posthog outside of PRs - but never on forks - needs.changes.outputs.backend == 'true' && - ( - github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name || github.repository - ) == 'PostHog/posthog' - steps: - - name: Calculate running time - run: | - echo "${{ secrets.GITHUB_TOKEN }}" | gh auth login --with-token - run_id=${GITHUB_RUN_ID} - repo=${GITHUB_REPOSITORY} - run_info=$(gh api repos/${repo}/actions/runs/${run_id}) - echo run_info: ${run_info} - # name is the name of the workflow file - # run_started_at is the start time of the workflow - # we want to get the number of seconds between the start time and now - name=$(echo ${run_info} | jq -r '.name') - run_url=$(echo ${run_info} | jq -r '.url') - run_started_at=$(echo ${run_info} | jq -r '.run_started_at') - run_attempt=$(echo ${run_info} | jq -r '.run_attempt') - start_seconds=$(date -d "${run_started_at}" +%s) - now_seconds=$(date +%s) - duration=$((now_seconds-start_seconds)) - echo running_time_duration_seconds=${duration} >> $GITHUB_ENV - echo running_time_run_url=${run_url} >> $GITHUB_ENV - echo running_time_run_attempt=${run_attempt} >> $GITHUB_ENV - echo running_time_run_id=${run_id} >> $GITHUB_ENV - echo running_time_run_started_at=${run_started_at} >> $GITHUB_ENV - - name: Capture running time to PostHog - uses: PostHog/posthog-github-action@v0.1 - with: - posthog-token: ${{secrets.POSTHOG_API_TOKEN}} - event: 'posthog-ci-running-time' - properties: '{"duration_seconds": ${{ env.running_time_duration_seconds }}, "run_url": "${{ env.running_time_run_url }}", "run_attempt": "${{ env.running_time_run_attempt }}", "run_id": "${{ env.running_time_run_id }}", "run_started_at": "${{ env.running_time_run_started_at }}"}' diff --git a/.github/workflows/ci-e2e.yml b/.github/workflows/ci-e2e.yml deleted file mode 100644 index 8717352036..0000000000 --- a/.github/workflows/ci-e2e.yml +++ /dev/null @@ -1,315 +0,0 @@ -# -# This workflow runs CI E2E tests with Cypress. -# -# It relies on the container image built by 'container-images-ci.yml'. -# -name: E2E CI - -on: - pull_request: - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -jobs: - changes: - runs-on: ubuntu-24.04 - timeout-minutes: 5 - name: Determine need to run E2E checks - # Set job outputs to values from filter step - outputs: - shouldTriggerCypress: ${{ steps.changes.outputs.shouldTriggerCypress }} - steps: - # For pull requests it's not necessary to check out the code - - uses: dorny/paths-filter@v2 - id: changes - with: - filters: | - shouldTriggerCypress: - # Avoid running E2E tests for irrelevant changes - # NOTE: we are at risk of missing a dependency here. We could make - # the dependencies more clear if we separated the backend/frontend - # code completely - - 'ee/**' - - 'posthog/**' - - 'bin/*' - - frontend/**/* - - requirements.txt - - requirements-dev.txt - - package.json - - pnpm-lock.yaml - # Make sure we run if someone is explicitly change the workflow - - .github/workflows/ci-e2e.yml - - .github/actions/build-n-cache-image/action.yml - # We use docker compose for tests, make sure we rerun on - # changes to docker-compose.dev.yml e.g. dependency - # version changes - - docker-compose.dev.yml - - Dockerfile - - cypress/** - - # Job that lists and chunks spec file names and caches node modules - chunks: - needs: changes - name: Cypress preparation - runs-on: ubuntu-24.04 - timeout-minutes: 5 - outputs: - chunks: ${{ steps.chunk.outputs.chunks }} - steps: - - name: Check out - uses: actions/checkout@v3 - - - name: Group spec files into chunks of three - id: chunk - run: echo "chunks=$(ls cypress/e2e/* | jq --slurp --raw-input -c 'split("\n")[:-1] | _nwise(2) | join("\n")' | jq --slurp -c .)" >> $GITHUB_OUTPUT - - container: - name: Build and cache container image - runs-on: ubuntu-24.04 - timeout-minutes: 60 - needs: [changes] - permissions: - contents: read - id-token: write # allow issuing OIDC tokens for this workflow run - outputs: - tag: ${{ steps.build.outputs.tag }} - build-id: ${{ steps.build.outputs.build-id }} - steps: - - name: Checkout - if: needs.changes.outputs.shouldTriggerCypress == 'true' - uses: actions/checkout@v3 - - name: Build the Docker image with Depot - if: needs.changes.outputs.shouldTriggerCypress == 'true' - # Build the container image in preparation for the E2E tests - uses: ./.github/actions/build-n-cache-image - id: build - with: - save: true - actions-id-token-request-url: ${{ env.ACTIONS_ID_TOKEN_REQUEST_URL }} - - cypress: - name: Cypress E2E tests (${{ strategy.job-index }}) - runs-on: ubuntu-24.04 - timeout-minutes: 60 - needs: [chunks, changes, container] - permissions: - id-token: write # allow issuing OIDC tokens for this workflow run - - strategy: - # when one test fails, DO NOT cancel the other - # containers, as there may be other spec failures - # we want to know about. - fail-fast: false - matrix: - chunk: ${{ fromJson(needs.chunks.outputs.chunks) }} - - steps: - - name: Checkout - if: needs.changes.outputs.shouldTriggerCypress == 'true' - uses: actions/checkout@v3 - - - name: Install pnpm - if: needs.changes.outputs.shouldTriggerCypress == 'true' - uses: pnpm/action-setup@v4 - - - name: Set up Node.js - if: needs.changes.outputs.shouldTriggerCypress == 'true' - uses: actions/setup-node@v4 - with: - node-version: 18.12.1 - - - name: Get pnpm cache directory path - if: needs.changes.outputs.shouldTriggerCypress == 'true' - id: pnpm-cache-dir - run: echo "PNPM_STORE_PATH=$(pnpm store path)" >> $GITHUB_OUTPUT - - - name: Get cypress cache directory path - if: needs.changes.outputs.shouldTriggerCypress == 'true' - id: cypress-cache-dir - run: echo "CYPRESS_BIN_PATH=$(npx cypress cache path)" >> $GITHUB_OUTPUT - - - uses: actions/cache@v4 - if: needs.changes.outputs.shouldTriggerCypress == 'true' - id: pnpm-cache - with: - path: | - ${{ steps.pnpm-cache-dir.outputs.PNPM_STORE_PATH }} - ${{ steps.cypress-cache-dir.outputs.CYPRESS_BIN_PATH }} - key: ${{ runner.os }}-pnpm-cypress-${{ hashFiles('**/pnpm-lock.yaml') }} - restore-keys: | - ${{ runner.os }}-pnpm-cypress- - - - name: Install package.json dependencies with pnpm - if: needs.changes.outputs.shouldTriggerCypress == 'true' - run: pnpm install --frozen-lockfile - - - name: Stop/Start stack with Docker Compose - # these are required checks so, we can't skip entire sections - if: needs.changes.outputs.shouldTriggerCypress == 'true' - run: | - docker compose -f docker-compose.dev.yml down - docker compose -f docker-compose.dev.yml up -d - - - name: Wait for ClickHouse - # these are required checks so, we can't skip entire sections - if: needs.changes.outputs.shouldTriggerCypress == 'true' - run: ./bin/check_kafka_clickhouse_up - - - name: Install Depot CLI - if: needs.changes.outputs.shouldTriggerCypress == 'true' - uses: depot/setup-action@v1 - - - name: Get Docker image cached in Depot - if: needs.changes.outputs.shouldTriggerCypress == 'true' - uses: depot/pull-action@v1 - with: - build-id: ${{ needs.container.outputs.build-id }} - tags: ${{ needs.container.outputs.tag }} - - - name: Write .env # This step intentionally has no if, so that GH always considers the action as having run - run: | - cat <> .env - SECRET_KEY=6b01eee4f945ca25045b5aab440b953461faf08693a9abbf1166dc7c6b9772da - REDIS_URL=redis://localhost - DATABASE_URL=postgres://posthog:posthog@localhost:5432/posthog - KAFKA_HOSTS=kafka:9092 - DISABLE_SECURE_SSL_REDIRECT=1 - SECURE_COOKIES=0 - OPT_OUT_CAPTURE=0 - E2E_TESTING=1 - SKIP_SERVICE_VERSION_REQUIREMENTS=1 - EMAIL_HOST=email.test.posthog.net - SITE_URL=http://localhost:8000 - NO_RESTART_LOOP=1 - CLICKHOUSE_SECURE=0 - OBJECT_STORAGE_ENABLED=1 - OBJECT_STORAGE_ENDPOINT=http://localhost:19000 - OBJECT_STORAGE_ACCESS_KEY_ID=object_storage_root_user - OBJECT_STORAGE_SECRET_ACCESS_KEY=object_storage_root_password - GITHUB_ACTION_RUN_URL="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" - CELERY_METRICS_PORT=8999 - CLOUD_DEPLOYMENT=E2E - ENCRYPTION_SALT_KEYS=00beef0000beef0000beef0000beef00 - EOT - - - name: Start PostHog - # these are required checks so, we can't skip entire sections - if: needs.changes.outputs.shouldTriggerCypress == 'true' - run: | - mkdir -p /tmp/logs - - echo "Starting PostHog using the container image ${{ needs.container.outputs.tag }}" - DOCKER_RUN="docker run --rm --network host --add-host kafka:127.0.0.1 --env-file .env ${{ needs.container.outputs.tag }}" - - $DOCKER_RUN ./bin/migrate - $DOCKER_RUN python manage.py setup_dev - - # only starts the plugin server so that the "wait for PostHog" step passes - $DOCKER_RUN ./bin/docker-worker &> /tmp/logs/worker.txt & - $DOCKER_RUN ./bin/docker-server &> /tmp/logs/server.txt & - - - name: Wait for PostHog - # these are required checks so, we can't skip entire sections - if: needs.changes.outputs.shouldTriggerCypress == 'true' - # this action might be abandoned - but v1 doesn't point to latest of v1 (which it should) - # so pointing to v1.1.0 to remove warnings about node version with v1 - # todo check https://github.com/iFaxity/wait-on-action/releases for new releases - uses: iFaxity/wait-on-action@v1.2.1 - timeout-minutes: 3 - with: - verbose: true - log: true - resource: http://localhost:8000 - - - name: Cypress run - # these are required checks so, we can't skip entire sections - if: needs.changes.outputs.shouldTriggerCypress == 'true' - uses: cypress-io/github-action@v6 - with: - config-file: cypress.e2e.config.ts - spec: ${{ matrix.chunk }} - install: false - # We were seeing suprising crashes in headless mode - # See https://github.com/cypress-io/cypress/issues/28893#issuecomment-1956480875 - headed: true - env: - E2E_TESTING: 1 - OPT_OUT_CAPTURE: 0 - GITHUB_ACTION_RUN_URL: '${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}' - - - name: Archive test screenshots - uses: actions/upload-artifact@v4 - with: - name: screenshots - path: cypress/screenshots - if: ${{ failure() }} - - - name: Archive test downloads - uses: actions/upload-artifact@v4 - with: - name: downloads - path: cypress/downloads - if: ${{ failure() }} - - - name: Archive test videos - uses: actions/upload-artifact@v4 - with: - name: videos - path: cypress/videos - if: ${{ failure() }} - - - name: Archive accessibility violations - if: needs.changes.outputs.shouldTriggerCypress == 'true' - uses: actions/upload-artifact@v4 - with: - name: accessibility-violations - path: '**/a11y/' - if-no-files-found: 'ignore' - - - name: Show logs on failure - # use artefact here, as I think the output will be too large for display in an action - uses: actions/upload-artifact@v4 - with: - name: logs-${{ strategy.job-index }} - path: /tmp/logs - if: ${{ failure() }} - - calculate-running-time: - name: Calculate running time - runs-on: ubuntu-24.04 - needs: [cypress] - if: needs.changes.outputs.shouldTriggerCypress == 'true' && - github.event.pull_request.head.repo.full_name == 'PostHog/posthog' - steps: - - name: Calculate running time - run: | - echo "${{ secrets.GITHUB_TOKEN }}" | gh auth login --with-token - run_id=${GITHUB_RUN_ID} - repo=${GITHUB_REPOSITORY} - run_info=$(gh api repos/${repo}/actions/runs/${run_id}) - echo run_info: ${run_info} - # name is the name of the workflow file - # run_started_at is the start time of the workflow - # we want to get the number of seconds between the start time and now - name=$(echo ${run_info} | jq -r '.name') - run_url=$(echo ${run_info} | jq -r '.url') - run_started_at=$(echo ${run_info} | jq -r '.run_started_at') - run_attempt=$(echo ${run_info} | jq -r '.run_attempt') - start_seconds=$(date -d "${run_started_at}" +%s) - now_seconds=$(date +%s) - duration=$((now_seconds-start_seconds)) - echo running_time_duration_seconds=${duration} >> $GITHUB_ENV - echo running_time_run_url=${run_url} >> $GITHUB_ENV - echo running_time_run_attempt=${run_attempt} >> $GITHUB_ENV - echo running_time_run_id=${run_id} >> $GITHUB_ENV - echo running_time_run_started_at=${run_started_at} >> $GITHUB_ENV - - - name: Capture running time to PostHog - if: github.event.pull_request.head.repo.full_name == 'PostHog/posthog' - uses: PostHog/posthog-github-action@v0.1 - with: - posthog-token: ${{secrets.POSTHOG_API_TOKEN}} - event: 'posthog-ci-running-time' - properties: '{"duration_seconds": ${{ env.running_time_duration_seconds }}, "run_url": "${{ env.running_time_run_url }}", "run_attempt": "${{ env.running_time_run_attempt }}", "run_id": "${{ env.running_time_run_id }}", "run_started_at": "${{ env.running_time_run_started_at }}"}' diff --git a/.github/workflows/ci-frontend.yml b/.github/workflows/ci-frontend.yml deleted file mode 100644 index f59c7e8eef..0000000000 --- a/.github/workflows/ci-frontend.yml +++ /dev/null @@ -1,163 +0,0 @@ -name: Frontend CI - -on: - pull_request: - push: - branches: - - master - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -jobs: - # Job to decide if we should run frontend ci - # See https://github.com/dorny/paths-filter#conditional-execution for more details - # we skip each step individually, so they are still reported as success - # because many of them are required for CI checks to be green - changes: - runs-on: ubuntu-24.04 - timeout-minutes: 5 - name: Determine need to run frontend checks - outputs: - frontend: ${{ steps.filter.outputs.frontend }} - steps: - # For pull requests it's not necessary to check out the code, but we - # also want this to run on master, so we need to check out - - uses: actions/checkout@v3 - - - uses: dorny/paths-filter@v2 - id: filter - with: - filters: | - frontend: - # Avoid running frontend tests for irrelevant changes - # NOTE: we are at risk of missing a dependency here. - - 'bin/**' - - 'frontend/**' - - 'ee/frontend/**' - # Make sure we run if someone is explicitly change the workflow - - .github/workflows/ci-frontend.yml - # various JS config files - - .eslintrc.js - - .prettier* - - babel.config.js - - package.json - - pnpm-lock.yaml - - jest.*.ts - - tsconfig.json - - tsconfig.*.json - - webpack.config.js - - stylelint* - - frontend-code-quality: - name: Code quality checks - needs: changes - # kea typegen and typescript:check need some more oomph - runs-on: ubuntu-24.04 - steps: - # we need at least one thing to run to make sure we include everything for required jobs - - uses: actions/checkout@v3 - - - name: Install pnpm - if: needs.changes.outputs.frontend == 'true' - uses: pnpm/action-setup@v4 - - - name: Set up Node.js - if: needs.changes.outputs.frontend == 'true' - uses: actions/setup-node@v4 - with: - node-version: 18.12.1 - - - name: Get pnpm cache directory path - if: needs.changes.outputs.frontend == 'true' - id: pnpm-cache-dir - run: echo "PNPM_STORE_PATH=$(pnpm store path)" >> $GITHUB_OUTPUT - - - uses: actions/cache@v4 - if: needs.changes.outputs.frontend == 'true' - id: pnpm-cache - with: - path: ${{ steps.pnpm-cache-dir.outputs.PNPM_STORE_PATH }} - key: ${{ runner.os }}-pnpm-cypress-${{ hashFiles('pnpm-lock.yaml') }} - restore-keys: ${{ runner.os }}-pnpm-cypress- - - - name: Install package.json dependencies with pnpm - if: needs.changes.outputs.frontend == 'true' - run: pnpm install --frozen-lockfile - - - name: Check formatting with prettier - if: needs.changes.outputs.frontend == 'true' - run: pnpm prettier:check - - - name: Lint with Stylelint - if: needs.changes.outputs.frontend == 'true' - run: pnpm lint:css - - - name: Generate logic types and run typescript with strict - if: needs.changes.outputs.frontend == 'true' - run: pnpm typegen:write && pnpm typescript:check - - - name: Lint with ESLint - if: needs.changes.outputs.frontend == 'true' - run: pnpm lint:js - - - name: Check if "schema.json" is up to date - if: needs.changes.outputs.frontend == 'true' - run: pnpm schema:build:json && git diff --exit-code - - - name: Check if mobile replay "schema.json" is up to date - if: needs.changes.outputs.frontend == 'true' - run: pnpm mobile-replay:schema:build:json && git diff --exit-code - - - name: Check toolbar bundle size - if: needs.changes.outputs.frontend == 'true' - uses: preactjs/compressed-size-action@v2 - with: - build-script: 'build' - compression: 'none' - pattern: 'frontend/dist/toolbar.js' - # we only care if the toolbar will increase a lot - minimum-change-threshold: 1000 - - jest: - runs-on: ubuntu-24.04 - needs: changes - name: Jest test (${{ matrix.segment }} - ${{ matrix.chunk }}) - - strategy: - # If one test fails, still run the others - fail-fast: false - matrix: - segment: ['FOSS', 'EE'] - chunk: [1, 2, 3] - - steps: - # we need at least one thing to run to make sure we include everything for required jobs - - uses: actions/checkout@v3 - - - name: Remove ee - if: needs.changes.outputs.frontend == 'true' && matrix.segment == 'FOSS' - run: rm -rf ee - - - name: Install pnpm - if: needs.changes.outputs.frontend == 'true' - uses: pnpm/action-setup@v4 - - - name: Set up Node.js - if: needs.changes.outputs.frontend == 'true' - uses: actions/setup-node@v4 - with: - node-version: 18.12.1 - cache: pnpm - - - name: Install package.json dependencies with pnpm - if: needs.changes.outputs.frontend == 'true' - run: pnpm install --frozen-lockfile - - - name: Test with Jest - # set maxWorkers or Jest only uses 1 CPU in GitHub Actions - run: pnpm test:unit --maxWorkers=2 --shard=${{ matrix.chunk }}/3 - if: needs.changes.outputs.frontend == 'true' - env: - NODE_OPTIONS: --max-old-space-size=6144 diff --git a/.github/workflows/ci-hobby.yml b/.github/workflows/ci-hobby.yml deleted file mode 100644 index 73d29cbdad..0000000000 --- a/.github/workflows/ci-hobby.yml +++ /dev/null @@ -1,50 +0,0 @@ -# This workflow runs e2e smoke test for hobby deployment -# To check on the status of the instance if this fails go to DO open the instance -# Instance name should look like `do-ci-hobby-deploy-xxxx` -# SSH onto the instance and `tail -f /var/log/cloud-init-output.log` -name: e2e - hobby smoke test -on: - push: - branches: - - 'release-*.*' - pull_request: - paths: - - docker-compose.base.yml - - docker-compose.hobby.yml - - bin/* - - docker/* - - .github/workflows/ci-hobby.yml - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -jobs: - changes: - runs-on: ubuntu-24.04 - # this is a slow one - timeout-minutes: 30 - name: Setup DO Hobby Instance and test - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 - with: - python-version: '3.8' - cache: 'pip' # caching pip dependencies - cache-dependency-path: '**/requirements*.txt' - token: ${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }} - - name: Get python deps - run: pip install python-digitalocean==1.17.0 requests==2.28.1 - - name: Setup DO Hobby Instance - run: python3 bin/hobby-ci.py create - env: - DIGITALOCEAN_TOKEN: ${{ secrets.DIGITALOCEAN_TOKEN }} - - name: Run smoke tests on DO - run: python3 bin/hobby-ci.py test $GITHUB_HEAD_REF - env: - DIGITALOCEAN_TOKEN: ${{ secrets.DIGITALOCEAN_TOKEN }} - - name: Post-cleanup step - if: always() - run: python3 bin/hobby-ci.py destroy - env: - DIGITALOCEAN_TOKEN: ${{ secrets.DIGITALOCEAN_TOKEN }} diff --git a/.github/workflows/ci-hog.yml b/.github/workflows/ci-hog.yml deleted file mode 100644 index ea51f70721..0000000000 --- a/.github/workflows/ci-hog.yml +++ /dev/null @@ -1,289 +0,0 @@ -# This workflow runs all of our backend django tests. -# -# If these tests get too slow, look at increasing concurrency and re-timing the tests by manually dispatching -# .github/workflows/ci-backend-update-test-timing.yml action -name: Hog CI - -on: - push: - branches: - - master - paths-ignore: - - rust/** - - livestream/** - pull_request: - paths-ignore: - - rust/** - - livestream/** - -jobs: - # Job to decide if we should run backend ci - # See https://github.com/dorny/paths-filter#conditional-execution for more details - changes: - runs-on: ubuntu-24.04 - timeout-minutes: 5 - name: Determine need to run Hog checks - # Set job outputs to values from filter step - outputs: - hog: ${{ steps.filter.outputs.hog }} - steps: - # For pull requests it's not necessary to checkout the code, but we - # also want this to run on master so we need to checkout - - uses: actions/checkout@v3 - - - uses: dorny/paths-filter@v2 - id: filter - with: - filters: | - hog: - # Avoid running tests for irrelevant changes - - 'hogvm/**/*' - - 'posthog/hogql/**/*' - - 'bin/hog' - - 'bin/hoge' - - requirements.txt - - requirements-dev.txt - - .github/workflows/ci-hog.yml - - hog-tests: - needs: changes - timeout-minutes: 30 - name: Hog tests - runs-on: ubuntu-24.04 - if: needs.changes.outputs.hog == 'true' - - steps: - # If this run wasn't initiated by the bot (meaning: snapshot update) and we've determined - # there are backend changes, cancel previous runs - - uses: n1hility/cancel-previous-runs@v3 - if: github.actor != 'posthog-bot' - with: - token: ${{ secrets.GITHUB_TOKEN }} - - - uses: actions/checkout@v3 - with: - fetch-depth: 1 - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: 3.11.9 - cache: 'pip' - cache-dependency-path: '**/requirements*.txt' - token: ${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }} - - # uv is a fast pip alternative: https://github.com/astral-sh/uv/ - - run: pip install uv - - - name: Install SAML (python3-saml) dependencies - run: | - sudo apt-get update - sudo apt-get install libxml2-dev libxmlsec1 libxmlsec1-dev libxmlsec1-openssl - - - name: Install Python dependencies - run: | - uv pip install --system -r requirements.txt -r requirements-dev.txt - - - name: Install pnpm - uses: pnpm/action-setup@v4 - - - name: Set up Node.js - uses: actions/setup-node@v4 - with: - node-version: 18 - - - name: Check if ANTLR definitions are up to date - run: | - cd .. - sudo apt-get install default-jre - mkdir antlr - cd antlr - curl -o antlr.jar https://www.antlr.org/download/antlr-$ANTLR_VERSION-complete.jar - export PWD=`pwd` - echo '#!/bin/bash' > antlr - echo "java -jar $PWD/antlr.jar \$*" >> antlr - chmod +x antlr - export CLASSPATH=".:$PWD/antlr.jar:$CLASSPATH" - export PATH="$PWD:$PATH" - - cd ../posthog - antlr | grep "Version" - npm run grammar:build && git diff --exit-code - env: - # Installing a version of ANTLR compatible with what's in Homebrew as of August 2024 (version 4.13.2), - # as apt-get is quite out of date. The same version must be set in hogql_parser/pyproject.toml - ANTLR_VERSION: '4.13.2' - - - name: Check if STL bytecode is up to date - run: | - python -m hogvm.stl.compile - git diff --exit-code - - - name: Run HogVM Python tests - run: | - pytest hogvm - - - name: Run HogVM TypeScript tests - run: | - cd hogvm/typescript - pnpm install --frozen-lockfile - pnpm run test - - - name: Run Hog tests - run: | - cd hogvm/typescript - pnpm run build - cd ../ - ./test.sh && git diff --exit-code - - check-package-version: - name: Check HogVM TypeScript package version and detect an update - needs: hog-tests - if: needs.hog-tests.result == 'success' && needs.changes.outputs.hog == 'true' - runs-on: ubuntu-24.04 - outputs: - committed-version: ${{ steps.check-package-version.outputs.committed-version }} - published-version: ${{ steps.check-package-version.outputs.published-version }} - is-new-version: ${{ steps.check-package-version.outputs.is-new-version }} - steps: - - name: Checkout the repository - uses: actions/checkout@v2 - - name: Check package version and detect an update - id: check-package-version - uses: PostHog/check-package-version@v2 - with: - path: hogvm/typescript - - release-hogvm: - name: Release new HogVM TypeScript version - runs-on: ubuntu-24.04 - needs: check-package-version - if: needs.changes.outputs.hog == 'true' && needs.check-package-version.outputs.is-new-version == 'true' - env: - COMMITTED_VERSION: ${{ needs.check-package-version.outputs.committed-version }} - PUBLISHED_VERSION: ${{ needs.check-package-version.outputs.published-version }} - steps: - - name: Checkout the repository - uses: actions/checkout@v4 - with: - fetch-depth: 1 - token: ${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }} - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: 3.11.9 - cache: 'pip' - cache-dependency-path: '**/requirements*.txt' - token: ${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }} - - run: pip install uv - - name: Install SAML (python3-saml) dependencies - run: | - sudo apt-get update - sudo apt-get install libxml2-dev libxmlsec1 libxmlsec1-dev libxmlsec1-openssl - - name: Install Python dependencies - run: | - uv pip install --system -r requirements.txt -r requirements-dev.txt - - name: Install pnpm - uses: pnpm/action-setup@v4 - - name: Set up Node 18 - uses: actions/setup-node@v4 - with: - node-version: 18 - registry-url: https://registry.npmjs.org - - name: Install package.json dependencies - run: cd hogvm/typescript && pnpm install - - name: Publish the package in the npm registry - run: cd hogvm/typescript && npm publish --access public - env: - NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} - - name: Sleep 60 seconds to allow npm to update the package - run: sleep 60 - - update-versions: - name: Update versions in package.json - runs-on: ubuntu-24.04 - needs: release-hogvm - if: always() # This ensures the job runs regardless of the result of release-hogvm - steps: - - name: Checkout the repository - uses: actions/checkout@v4 - with: - fetch-depth: 1 - repository: ${{ github.event.pull_request.head.repo.full_name }} - ref: ${{ github.event.pull_request.head.ref }} - token: ${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }} - - - name: Install pnpm - uses: pnpm/action-setup@v4 - - name: Set up Node 18 - uses: actions/setup-node@v4 - with: - node-version: 18 - registry-url: https://registry.npmjs.org - - - name: Check for version mismatches - id: check-mismatch - run: | - # Extract committed version - HOGVM_VERSION=$(jq -r '.version' hogvm/typescript/package.json) - - # Compare dependencies in package.json - MAIN_VERSION=$(jq -r '.dependencies."@posthog/hogvm"' package.json | tr -d '^') - PLUGIN_VERSION=$(jq -r '.dependencies."@posthog/hogvm"' plugin-server/package.json | tr -d '^') - - echo "HOGVM_VERSION=$HOGVM_VERSION" - echo "MAIN_VERSION=$MAIN_VERSION" - echo "PLUGIN_VERSION=$PLUGIN_VERSION" - - # Set output if mismatches exist - if [[ "$HOGVM_VERSION" != "$MAIN_VERSION" || "$HOGVM_VERSION" != "$PLUGIN_VERSION" ]]; then - echo "mismatch=true" >> "$GITHUB_ENV" - else - echo "mismatch=false" >> "$GITHUB_ENV" - fi - - - name: Update package.json versions - if: env.mismatch == 'true' - run: | - VERSION=$(jq ".version" hogvm/typescript/package.json -r) - - retry_pnpm_install() { - local retries=0 - local max_retries=20 # 10 minutes total - local delay=30 - - while [[ $retries -lt $max_retries ]]; do - echo "Attempting pnpm install (retry $((retries+1))/$max_retries)..." - pnpm install --no-frozen-lockfile && break - echo "Install failed. Retrying in $delay seconds..." - sleep $delay - retries=$((retries + 1)) - done - - if [[ $retries -eq $max_retries ]]; then - echo "pnpm install failed after $max_retries attempts." - exit 1 - fi - } - - # Update main package.json - mv package.json package.old.json - jq --indent 4 '.dependencies."@posthog/hogvm" = "^'$VERSION'"' package.old.json > package.json - rm package.old.json - retry_pnpm_install - - # Update plugin-server/package.json - cd plugin-server - mv package.json package.old.json - jq --indent 4 '.dependencies."@posthog/hogvm" = "^'$VERSION'"' package.old.json > package.json - rm package.old.json - retry_pnpm_install - - - name: Commit updated package.json files - if: env.mismatch == 'true' - uses: EndBug/add-and-commit@v9 - with: - add: '["package.json", "pnpm-lock.yaml", "plugin-server/package.json", "plugin-server/pnpm-lock.yaml", "hogvm/typescript/package.json"]' - message: 'Update @posthog/hogvm version in package.json' - default_author: github_actions - github_token: ${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }} diff --git a/.github/workflows/ci-plugin-server.yml b/.github/workflows/ci-plugin-server.yml deleted file mode 100644 index 1c8ba97095..0000000000 --- a/.github/workflows/ci-plugin-server.yml +++ /dev/null @@ -1,293 +0,0 @@ -name: Plugin Server CI - -on: - pull_request: - push: - branches: - - master - -env: - OBJECT_STORAGE_ENABLED: true - OBJECT_STORAGE_ENDPOINT: 'http://localhost:19000' - OBJECT_STORAGE_ACCESS_KEY_ID: 'object_storage_root_user' - OBJECT_STORAGE_SECRET_ACCESS_KEY: 'object_storage_root_password' - OBJECT_STORAGE_SESSION_RECORDING_FOLDER: 'session_recordings' - OBJECT_STORAGE_BUCKET: 'posthog' - # set the max buffer size small enough that the functional tests behave the same in CI as when running locally - SESSION_RECORDING_MAX_BUFFER_SIZE_KB: 1024 - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -jobs: - # Job to decide if we should run plugin server ci - # See https://github.com/dorny/paths-filter#conditional-execution for more details - changes: - runs-on: ubuntu-24.04 - timeout-minutes: 5 - name: Determine need to run plugin server checks - outputs: - plugin-server: ${{ steps.filter.outputs.plugin-server }} - steps: - # For pull requests it's not necessary to checkout the code, but we - # also want this to run on master so we need to checkout - - uses: actions/checkout@v3 - - - uses: dorny/paths-filter@v2 - id: filter - with: - filters: | - plugin-server: - - .github/workflows/ci-plugin-server.yml - - 'plugin-server/**' - - 'posthog/clickhouse/migrations/**' - - 'ee/migrations/**' - - 'posthog/management/commands/setup_test_environment.py' - - 'posthog/migrations/**' - - 'posthog/plugins/**' - - 'docker*.yml' - - '*Dockerfile' - - code-quality: - name: Code quality - needs: changes - if: needs.changes.outputs.plugin-server == 'true' - runs-on: ubuntu-24.04 - defaults: - run: - working-directory: 'plugin-server' - steps: - - uses: actions/checkout@v3 - - - name: Install pnpm - uses: pnpm/action-setup@v4 - - - name: Set up Node.js - uses: actions/setup-node@v4 - with: - node-version: 18.12.1 - cache: pnpm - - - name: Install package.json dependencies with pnpm - run: pnpm install --frozen-lockfile - - - name: Check formatting with prettier - run: pnpm prettier:check - - - name: Lint with ESLint - run: pnpm lint - - tests: - name: Plugin Server Tests (${{matrix.shard}}) - needs: changes - runs-on: ubuntu-24.04 - - strategy: - fail-fast: false - matrix: - shard: [1/3, 2/3, 3/3] - - env: - REDIS_URL: 'redis://localhost' - CLICKHOUSE_HOST: 'localhost' - CLICKHOUSE_DATABASE: 'posthog_test' - KAFKA_HOSTS: 'kafka:9092' - - steps: - - name: Code check out - # NOTE: We need this check on every step so that it still runs if skipped as we need it to suceed for the CI - if: needs.changes.outputs.plugin-server == 'true' - uses: actions/checkout@v3 - - - name: Stop/Start stack with Docker Compose - if: needs.changes.outputs.plugin-server == 'true' - run: | - docker compose -f docker-compose.dev.yml down - docker compose -f docker-compose.dev.yml up -d - - - name: Add Kafka to /etc/hosts - if: needs.changes.outputs.plugin-server == 'true' - run: echo "127.0.0.1 kafka" | sudo tee -a /etc/hosts - - - name: Set up Python - if: needs.changes.outputs.plugin-server == 'true' - uses: actions/setup-python@v5 - with: - python-version: 3.11.9 - cache: 'pip' - cache-dependency-path: '**/requirements*.txt' - token: ${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }} - - # uv is a fast pip alternative: https://github.com/astral-sh/uv/ - - run: pip install uv - - - name: Install rust - if: needs.changes.outputs.plugin-server == 'true' - uses: dtolnay/rust-toolchain@1.82 - - - uses: actions/cache@v4 - with: - path: | - ~/.cargo/registry - ~/.cargo/git - rust/target - key: ${{ runner.os }}-cargo-release-${{ hashFiles('**/Cargo.lock') }} - - - name: Install sqlx-cli - if: needs.changes.outputs.plugin-server == 'true' - working-directory: rust - run: cargo install sqlx-cli@0.7.3 --locked --no-default-features --features native-tls,postgres - - - name: Install SAML (python3-saml) dependencies - if: needs.changes.outputs.plugin-server == 'true' - run: | - sudo apt-get update - sudo apt-get install libxml2-dev libxmlsec1-dev libxmlsec1-openssl - - - name: Install python dependencies - if: needs.changes.outputs.plugin-server == 'true' - run: | - uv pip install --system -r requirements-dev.txt - uv pip install --system -r requirements.txt - - - name: Install pnpm - if: needs.changes.outputs.plugin-server == 'true' - uses: pnpm/action-setup@v4 - - - name: Set up Node.js - if: needs.changes.outputs.plugin-server == 'true' - uses: actions/setup-node@v4 - with: - node-version: 18.12.1 - cache: pnpm - cache-dependency-path: plugin-server/pnpm-lock.yaml - - - name: Install package.json dependencies with pnpm - if: needs.changes.outputs.plugin-server == 'true' - run: cd plugin-server && pnpm i - - - name: Wait for Clickhouse, Redis & Kafka - if: needs.changes.outputs.plugin-server == 'true' - run: | - docker compose -f docker-compose.dev.yml up kafka redis clickhouse -d --wait - bin/check_kafka_clickhouse_up - - - name: Set up databases - if: needs.changes.outputs.plugin-server == 'true' - env: - TEST: 'true' - SECRET_KEY: 'abcdef' # unsafe - for testing only - DATABASE_URL: 'postgres://posthog:posthog@localhost:5432/posthog' - run: cd plugin-server && pnpm setup:test - - - name: Test with Jest - if: needs.changes.outputs.plugin-server == 'true' - env: - # Below DB name has `test_` prepended, as that's how Django (ran above) creates the test DB - DATABASE_URL: 'postgres://posthog:posthog@localhost:5432/test_posthog' - REDIS_URL: 'redis://localhost' - NODE_OPTIONS: '--max_old_space_size=4096' - run: cd plugin-server && pnpm test -- --runInBand --forceExit tests/ --shard=${{matrix.shard}} - - functional-tests: - name: Functional tests - needs: changes - runs-on: ubuntu-24.04 - - env: - REDIS_URL: 'redis://localhost' - CLICKHOUSE_HOST: 'localhost' - CLICKHOUSE_DATABASE: 'posthog_test' - KAFKA_HOSTS: 'kafka:9092' - DATABASE_URL: 'postgres://posthog:posthog@localhost:5432/posthog' - RELOAD_PLUGIN_JITTER_MAX_MS: 0 - ENCRYPTION_SALT_KEYS: '00beef0000beef0000beef0000beef00' - - steps: - - name: Code check out - if: needs.changes.outputs.plugin-server == 'true' - uses: actions/checkout@v3 - - - name: Stop/Start stack with Docker Compose - if: needs.changes.outputs.plugin-server == 'true' - run: | - docker compose -f docker-compose.dev.yml down - docker compose -f docker-compose.dev.yml up -d - - - name: Add Kafka to /etc/hosts - if: needs.changes.outputs.plugin-server == 'true' - run: echo "127.0.0.1 kafka" | sudo tee -a /etc/hosts - - - name: Set up Python - if: needs.changes.outputs.plugin-server == 'true' - uses: actions/setup-python@v5 - with: - python-version: 3.11.9 - cache: 'pip' - cache-dependency-path: '**/requirements*.txt' - token: ${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }} - - # uv is a fast pip alternative: https://github.com/astral-sh/uv/ - - run: pip install uv - - - name: Install SAML (python3-saml) dependencies - if: needs.changes.outputs.plugin-server == 'true' - run: | - sudo apt-get update - sudo apt-get install libxml2-dev libxmlsec1-dev libxmlsec1-openssl - - - name: Install python dependencies - if: needs.changes.outputs.plugin-server == 'true' - run: | - uv pip install --system -r requirements-dev.txt - uv pip install --system -r requirements.txt - - - name: Install pnpm - if: needs.changes.outputs.plugin-server == 'true' - uses: pnpm/action-setup@v4 - - - name: Set up Node.js - if: needs.changes.outputs.plugin-server == 'true' - uses: actions/setup-node@v4 - with: - node-version: 18.12.1 - cache: pnpm - cache-dependency-path: plugin-server/pnpm-lock.yaml - - - name: Install package.json dependencies with pnpm - if: needs.changes.outputs.plugin-server == 'true' - run: | - cd plugin-server - pnpm install --frozen-lockfile - pnpm build - - - name: Wait for Clickhouse, Redis & Kafka - if: needs.changes.outputs.plugin-server == 'true' - run: | - docker compose -f docker-compose.dev.yml up kafka redis clickhouse -d --wait - bin/check_kafka_clickhouse_up - - - name: Set up databases - if: needs.changes.outputs.plugin-server == 'true' - env: - DEBUG: 'true' - SECRET_KEY: 'abcdef' # unsafe - for testing only - run: | - ./manage.py migrate - ./manage.py migrate_clickhouse - - - name: Run functional tests - if: needs.changes.outputs.plugin-server == 'true' - run: | - cd plugin-server - ./bin/ci_functional_tests.sh - - - name: Upload coverage report - uses: actions/upload-artifact@v4 - if: needs.changes.outputs.plugin-server == 'true' - with: - name: functional-coverage - if-no-files-found: warn - retention-days: 1 - path: 'plugin-server/coverage' diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml deleted file mode 100644 index 56f1ed3bf0..0000000000 --- a/.github/workflows/codeql.yml +++ /dev/null @@ -1,105 +0,0 @@ -# For most projects, this workflow file will not need changing; you simply need -# to commit it to your repository. -# -# You may wish to alter this file to override the set of languages analyzed, -# or to provide custom queries or build logic. -# -# ******** NOTE ******** -# We have attempted to detect the languages in your repository. Please check -# the `language` matrix defined below to confirm you have the correct set of -# supported CodeQL languages. -# -name: 'CodeQL' - -on: - push: - branches: ['master'] - paths-ignore: - - 'rust/**' - pull_request: - branches: ['master'] - paths-ignore: - - 'rust/**' - schedule: - - cron: '27 1 * * 0' - -jobs: - analyze: - name: Analyze (${{ matrix.language }}) - # Runner size impacts CodeQL analysis time. To learn more, please see: - # - https://gh.io/recommended-hardware-resources-for-running-codeql - # - https://gh.io/supported-runners-and-hardware-resources - # - https://gh.io/using-larger-runners (GitHub.com only) - # Consider using larger runners or machines with greater resources for possible analysis time improvements. - runs-on: 'ubuntu-24.04' - timeout-minutes: 15 - permissions: - # required for all workflows - security-events: write - - # required to fetch internal or private CodeQL packs - packages: read - - # only required for workflows in private repositories - actions: read - contents: read - - strategy: - fail-fast: false - matrix: - include: - # TODO: Add cpp with manual build mode when we need it. - # needs updating of manual build instructions below - # - language: c-cpp - # build-mode: manual - - language: javascript-typescript - build-mode: none - - language: python - build-mode: none - - language: go - build-mode: autobuild - # CodeQL supports the following values keywords for 'language': 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' - # Use `c-cpp` to analyze code written in C, C++ or both - # Use 'java-kotlin' to analyze code written in Java, Kotlin or both - # Use 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both - # To learn more about changing the languages that are analyzed or customizing the build mode for your analysis, - # see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning. - # If you are analyzing a compiled language, you can modify the 'build-mode' for that language to customize how - # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - # Initializes the CodeQL tools for scanning. - - name: Initialize CodeQL - uses: github/codeql-action/init@v3 - with: - languages: ${{ matrix.language }} - build-mode: ${{ matrix.build-mode }} - # If you wish to specify custom queries, you can do so here or in a config file. - # By default, queries listed here will override any specified in a config file. - # Prefix the list here with "+" to use these queries and those in the config file. - - # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs - # queries: security-extended,security-and-quality - - # If the analyze step fails for one of the languages you are analyzing with - # "We were unable to automatically build your code", modify the matrix above - # to set the build mode to "manual" for that language. Then modify this step - # to build your code. - # ℹ️ Command-line programs to run using the OS shell. - # πŸ“š See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun - - if: matrix.build-mode == 'manual' - shell: bash - run: | - echo 'If you are using a "manual" build mode for one or more of the' \ - 'languages you are analyzing, replace this with the commands to build' \ - 'your code, for example:' - echo ' make bootstrap' - echo ' make release' - exit 1 - - - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v3 - with: - category: '/language:${{matrix.language}}' diff --git a/.github/workflows/codespaces.yml b/.github/workflows/codespaces.yml deleted file mode 100644 index 06f796951d..0000000000 --- a/.github/workflows/codespaces.yml +++ /dev/null @@ -1,92 +0,0 @@ -name: GitHub Codespaces image build - -#Β Run only on master branch. We could also build on branch but this seems like -#Β an optimization that can be done as and when desired. The main use case we're -#Β handling is creating and working off a branch from master, so it doesn't seem -#Β like an immediate requirement to have branches as well. -# -# NOTE: the job is setup to also push branch images as well, and using branch -#Β and master as caching, so if we want to add the optimisation for branches we -#Β can just remove the master branch restriction. -on: - push: - branches: - - master - pull_request: - types: - - opened - - labeled - - synchronize - -jobs: - build: - name: Build Codespaces image - runs-on: ubuntu-24.04 - - # Build on master and PRs with the label 'codespaces-build' only - if: ${{ github.ref == 'refs/heads/master' || contains(github.event.pull_request.labels.*.name, 'codespaces-build') }} - - steps: - - uses: actions/checkout@v3 - with: - fetch-depth: 1 - - - name: Lowercase GITHUB_REPOSITORY - id: lowercase - run: | - echo "repository=${GITHUB_REPOSITORY,,}" >> "$GITHUB_OUTPUT" - - # As ghcr.io complains if the image has upper case letters, we use - # this action to ensure we get a lower case version. See - # https://github.com/docker/build-push-action/issues/237#issuecomment-848673650 - # for more details - - name: Docker image metadata - id: meta - uses: docker/metadata-action@v5 - with: - images: ghcr.io/${{ steps.lowercase.outputs.repository }}/codespaces - tags: | - type=ref,event=branch - type=raw,value=master - - # We also want to use cache-from when building, but we want to also - # include the master tag so we get the master branch image as well. - # This creates a scope similar to the github cache action scoping - - name: Docker cache-from/cache-to metadata - id: meta-for-cache - uses: docker/metadata-action@v5 - with: - images: ghcr.io/${{ steps.lowercase.outputs.repository }}/codespaces - tags: | - type=raw,value=master - - # Install QEMU so we can target x86_64 (github codespaces) - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 - - - name: Login to GitHub Container Registry - uses: docker/login-action@v3 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Build and push - uses: docker/build-push-action@v2 - with: - context: . - file: .devcontainer/Dockerfile - push: true - platforms: x86_64 - # Cache from this branch, or master - cache-from: ${{ steps.meta-for-cache.outputs.tags }} - # NOTE: we use inline as suggested here: - # https://github.com/docker/build-push-action/blob/master/docs/advanced/cache.md#inline-cache - # It notes that it doesn't support mode=max, but we're not - # removing any layers, soooo, maybe it's fine. - cache-to: type=inline - tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} diff --git a/.github/workflows/container-images-cd.yml b/.github/workflows/container-images-cd.yml deleted file mode 100644 index c5cacd1dec..0000000000 --- a/.github/workflows/container-images-cd.yml +++ /dev/null @@ -1,250 +0,0 @@ -# -# Build and push PostHog and PostHog Cloud container images -# -# - posthog_build: build and push the PostHog container image to DockerHub -# -# - posthog_cloud_build: build the PostHog Cloud container image using -# as base image the container image from the previous step. The image is -# then pushed to AWS ECR. -# -name: Container Images CD - -on: - push: - branches: - - master - paths-ignore: - - 'rust/**' - - 'livestream/**' - workflow_dispatch: - -jobs: - posthog_build: - name: Build and push PostHog - if: github.repository == 'PostHog/posthog' - runs-on: ubuntu-24.04 - permissions: - id-token: write # allow issuing OIDC tokens for this workflow run - contents: read # allow at least reading the repo contents, add other permissions if necessary - packages: write # allow push to ghcr.io - - steps: - - name: Check out - uses: actions/checkout@v4 - with: - fetch-depth: 2 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: Set up Depot CLI - uses: depot/setup-action@v1 - - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v4 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: us-east-1 - - - name: Login to Amazon ECR - id: aws-ecr - uses: aws-actions/amazon-ecr-login@v2 - - - name: Login to DockerHub - uses: docker/login-action@v3 - with: - username: ${{ secrets.DOCKERHUB_USER }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - - name: Build and push container image - id: build - uses: depot/build-push-action@v1 - with: - buildx-fallback: false # the fallback is so slow it's better to just fail - push: true - tags: posthog/posthog:${{ github.sha }},posthog/posthog:latest,${{ steps.aws-ecr.outputs.registry }}/posthog-cloud:master - platforms: linux/arm64,linux/amd64 - build-args: COMMIT_HASH=${{ github.sha }} - - - name: get deployer token - id: deployer - uses: getsentry/action-github-app-token@v3 - with: - app_id: ${{ secrets.DEPLOYER_APP_ID }} - private_key: ${{ secrets.DEPLOYER_APP_PRIVATE_KEY }} - - - name: get PR labels - id: labels - uses: ./.github/actions/get-pr-labels - with: - token: ${{ secrets.GITHUB_TOKEN }} - - - name: Trigger PostHog Cloud deployment from Charts - uses: peter-evans/repository-dispatch@v3 - with: - token: ${{ steps.deployer.outputs.token }} - repository: PostHog/charts - event-type: commit_state_update - client-payload: | - { - "values": { - "image": { - "sha": "${{ steps.build.outputs.digest }}" - } - }, - "release": "posthog", - "commit": ${{ toJson(github.event.head_commit) }}, - "repository": ${{ toJson(github.repository) }}, - "labels": ${{ steps.labels.outputs.labels }}, - "timestamp": "${{ github.event.head_commit.timestamp }}" - } - - - name: Check for changes in plugins directory - id: check_changes_plugins - run: | - echo "changed=$((git diff --name-only HEAD^ HEAD | grep -q '^plugin-server/' && echo true) || echo false)" >> $GITHUB_OUTPUT - - - name: Trigger Ingestion Cloud deployment - if: steps.check_changes_plugins.outputs.changed == 'true' - uses: peter-evans/repository-dispatch@v3 - with: - token: ${{ steps.deployer.outputs.token }} - repository: PostHog/charts - event-type: commit_state_update - client-payload: | - { - "values": { - "image": { - "sha": "${{ steps.build.outputs.digest }}" - } - }, - "release": "ingestion", - "commit": ${{ toJson(github.event.head_commit) }}, - "repository": ${{ toJson(github.repository) }}, - "labels": ${{ toJson(steps.labels.outputs.labels) }}, - "timestamp": "${{ github.event.head_commit.timestamp }}" - } - - - name: Check for changes that affect batch exports temporal worker - id: check_changes_batch_exports_temporal_worker - run: | - echo "changed=$((git diff --name-only HEAD^ HEAD | grep -qE '^posthog/temporal/common|^posthog/temporal/batch_exports|^posthog/batch_exports/|^posthog/management/commands/start_temporal_worker.py$|^requirements.txt$' && echo true) || echo false)" >> $GITHUB_OUTPUT - - - name: Trigger Batch Exports Sync Temporal Worker Cloud deployment - if: steps.check_changes_batch_exports_temporal_worker.outputs.changed == 'true' - uses: peter-evans/repository-dispatch@v3 - with: - token: ${{ steps.deployer.outputs.token }} - repository: PostHog/charts - event-type: commit_state_update - client-payload: | - { - "values": { - "image": { - "sha": "${{ steps.build.outputs.digest }}" - } - }, - "release": "temporal-worker", - "commit": ${{ toJson(github.event.head_commit) }}, - "repository": ${{ toJson(github.repository) }}, - "labels": ${{ steps.labels.outputs.labels }}, - "timestamp": "${{ github.event.head_commit.timestamp }}" - } - - - name: Trigger Batch Exports Temporal Worker Cloud deployment - if: steps.check_changes_batch_exports_temporal_worker.outputs.changed == 'true' - uses: peter-evans/repository-dispatch@v3 - with: - token: ${{ steps.deployer.outputs.token }} - repository: PostHog/charts - event-type: commit_state_update - client-payload: | - { - "values": { - "image": { - "sha": "${{ steps.build.outputs.digest }}" - } - }, - "release": "temporal-worker-batch-exports", - "commit": ${{ toJson(github.event.head_commit) }}, - "repository": ${{ toJson(github.repository) }}, - "labels": ${{ steps.labels.outputs.labels }}, - "timestamp": "${{ github.event.head_commit.timestamp }}" - } - - - name: Check for changes that affect general purpose temporal worker - id: check_changes_general_purpose_temporal_worker - run: | - echo "changed=$((git diff --name-only HEAD^ HEAD | grep -qE '^posthog/temporal/common|^posthog/temporal/proxy_service|^posthog/management/commands/start_temporal_worker.py$|^requirements.txt$' && echo true) || echo false)" >> $GITHUB_OUTPUT - - - name: Trigger General Purpose Temporal Worker Cloud deployment - if: steps.check_changes_general_purpose_temporal_worker.outputs.changed == 'true' - uses: peter-evans/repository-dispatch@v3 - with: - token: ${{ steps.deployer.outputs.token }} - repository: PostHog/charts - event-type: commit_state_update - client-payload: | - { - "values": { - "image": { - "sha": "${{ steps.build.outputs.digest }}" - } - }, - "release": "temporal-worker-general-purpose", - "commit": ${{ toJson(github.event.head_commit) }}, - "repository": ${{ toJson(github.repository) }}, - "labels": ${{ steps.labels.outputs.labels }}, - "timestamp": "${{ github.event.head_commit.timestamp }}" - } - - - name: Check for changes that affect data warehouse temporal worker - id: check_changes_data_warehouse_temporal_worker - run: | - echo "changed=$((git diff --name-only HEAD^ HEAD | grep -qE '^posthog/temporal/common|^posthog/temporal/data_imports|^posthog/warehouse/|^posthog/management/commands/start_temporal_worker.py$|^requirements.txt$' && echo true) || echo false)" >> $GITHUB_OUTPUT - - - name: Trigger Data Warehouse Temporal Worker Cloud deployment - if: steps.check_changes_data_warehouse_temporal_worker.outputs.changed == 'true' - uses: peter-evans/repository-dispatch@v3 - with: - token: ${{ steps.deployer.outputs.token }} - repository: PostHog/charts - event-type: commit_state_update - client-payload: | - { - "values": { - "image": { - "sha": "${{ steps.build.outputs.digest }}" - } - }, - "release": "temporal-worker-data-warehouse", - "commit": ${{ toJson(github.event.head_commit) }}, - "repository": ${{ toJson(github.repository) }}, - "labels": ${{ steps.labels.outputs.labels }}, - "timestamp": "${{ github.event.head_commit.timestamp }}" - } - - - name: Trigger Data Warehouse V2 Temporal Worker Cloud deployment - if: steps.check_changes_data_warehouse_temporal_worker.outputs.changed == 'true' - uses: peter-evans/repository-dispatch@v3 - with: - token: ${{ steps.deployer.outputs.token }} - repository: PostHog/charts - event-type: commit_state_update - client-payload: | - { - "values": { - "image": { - "sha": "${{ steps.build.outputs.digest }}" - } - }, - "release": "temporal-worker-data-warehouse-v2", - "commit": ${{ toJson(github.event.head_commit) }}, - "repository": ${{ toJson(github.repository) }}, - "labels": ${{ steps.labels.outputs.labels }}, - "timestamp": "${{ github.event.head_commit.timestamp }}" - } diff --git a/.github/workflows/container-images-ci.yml b/.github/workflows/container-images-ci.yml deleted file mode 100644 index 7b434a7cb5..0000000000 --- a/.github/workflows/container-images-ci.yml +++ /dev/null @@ -1,75 +0,0 @@ -name: Container Images CI - -on: - pull_request: - paths-ignore: - - 'rust/**' - - 'livestream/**' - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -jobs: - posthog_build: - name: Build Docker image - runs-on: ubuntu-24.04 - permissions: - id-token: write # allow issuing OIDC tokens for this workflow run - contents: read # allow at least reading the repo contents, add other permissions if necessary - - steps: - # If this run wasn't initiated by PostHog Bot (meaning: snapshot update), - # cancel previous runs of snapshot update-inducing workflows - - - uses: n1hility/cancel-previous-runs@v3 - if: github.actor != 'posthog-bot' - with: - token: ${{ secrets.GITHUB_TOKEN }} - workflow: .github/workflows/storybook-chromatic.yml - - - uses: n1hility/cancel-previous-runs@v3 - if: github.actor != 'posthog-bot' - with: - token: ${{ secrets.GITHUB_TOKEN }} - workflow: .github/workflows/ci-backend.yml - - - name: Check out - uses: actions/checkout@v3 - - - name: Build and cache Docker image in Depot - uses: ./.github/actions/build-n-cache-image - with: - actions-id-token-request-url: ${{ env.ACTIONS_ID_TOKEN_REQUEST_URL }} - - deploy_preview: - name: Deploy preview environment - uses: ./.github/workflows/pr-deploy.yml - needs: [posthog_build] - secrets: inherit - if: ${{ contains(github.event.pull_request.labels.*.name, 'deploy') }} - - lint: - name: Lint changed Dockerfiles - runs-on: ubuntu-24.04 - steps: - - name: Check out - uses: actions/checkout@v3 - with: - fetch-depth: 0 - - - name: Check if any Dockerfile has changed - id: changed-files - uses: tj-actions/changed-files@v43 - with: - files: | - **/Dockerfile - **/*.Dockerfile - **/Dockerfile.* - separator: ' ' - - - name: Lint changed Dockerfile(s) with Hadolint - uses: jbergstroem/hadolint-gh-action@v1 - if: steps.changed-files.outputs.any_changed == 'true' - with: - dockerfile: '${{ steps.changed-files.outputs.all_modified_files }}' diff --git a/.github/workflows/copy-clickhouse-udfs.yml b/.github/workflows/copy-clickhouse-udfs.yml deleted file mode 100644 index b55d66bc30..0000000000 --- a/.github/workflows/copy-clickhouse-udfs.yml +++ /dev/null @@ -1,20 +0,0 @@ -name: Trigger UDFs Workflow - -on: - push: - branches: - - master - paths: - - 'posthog/user_scripts/**' - -jobs: - trigger_udfs_workflow: - runs-on: ubuntu-24.04 - steps: - - name: Trigger UDFs Workflow - uses: benc-uk/workflow-dispatch@v1 - with: - workflow: .github/workflows/clickhouse-udfs.yml - repo: posthog/posthog-cloud-infra - token: ${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }} - ref: refs/heads/main diff --git a/.github/workflows/foss-sync.yml b/.github/workflows/foss-sync.yml deleted file mode 100644 index b8edfe6321..0000000000 --- a/.github/workflows/foss-sync.yml +++ /dev/null @@ -1,52 +0,0 @@ -name: Sync PostHog FOSS - -on: - push: - branches: - - master - - main - -jobs: - repo-sync: - name: Sync posthog-foss with posthog - if: github.repository == 'PostHog/posthog' - runs-on: ubuntu-24.04 - steps: - - name: Sync repositories 1 to 1 - master branch - uses: PostHog/git-sync@v3 - with: - source_repo: 'https://posthog-bot:${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }}@github.com/posthog/posthog.git' - source_branch: 'master' - destination_repo: 'https://posthog-bot:${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }}@github.com/posthog/posthog-foss.git' - destination_branch: 'master' - - name: Sync repositories 1 to 1 – tags - uses: PostHog/git-sync@v3 - with: - source_repo: 'https://posthog-bot:${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }}@github.com/posthog/posthog.git' - source_branch: 'refs/tags/*' - destination_repo: 'https://posthog-bot:${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }}@github.com/posthog/posthog-foss.git' - destination_branch: 'refs/tags/*' - - name: Checkout posthog-foss - uses: actions/checkout@v3 - with: - repository: 'posthog/posthog-foss' - ref: master - token: ${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }} - - name: Change LICENSE to pure MIT - run: | - sed -i -e '/PostHog Inc\./,/Permission is hereby granted/c\Copyright (c) 2020-2021 PostHog Inc\.\n\nPermission is hereby granted, free of charge, to any person obtaining a copy' LICENSE - echo -e "MIT License\n\n$(cat LICENSE)" > LICENSE - - name: Remove unused GitHub workflows - run: | - cd .github/workflows - ls | grep -v foss-release-image-publish.yml | xargs rm - - - name: Commit "Sync and remove all non-FOSS parts" - uses: EndBug/add-and-commit@v9 - with: - message: 'Sync and remove all non-FOSS parts' - remove: '["-r ee/"]' - default_author: github_actions - github_token: ${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }} - repository: 'posthog/posthog-foss' - ref: master diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml deleted file mode 100644 index 4fc7344a97..0000000000 --- a/.github/workflows/go.yml +++ /dev/null @@ -1,22 +0,0 @@ -name: Go Test (for livestream service) - -on: - pull_request: - paths: - - 'livestream/**' - -jobs: - test: - runs-on: ubuntu-24.04 - - steps: - - name: Checkout code - uses: actions/checkout@v2 - - - name: Set up Go - uses: actions/setup-go@v2 - with: - go-version: 1.22 - - - name: Run tests - run: cd livestream && go test -v diff --git a/.github/workflows/lint-new-pr.yml b/.github/workflows/lint-new-pr.yml deleted file mode 100644 index 7db972a9bc..0000000000 --- a/.github/workflows/lint-new-pr.yml +++ /dev/null @@ -1,35 +0,0 @@ -name: Lint new PR - -on: - pull_request: - types: [opened, ready_for_review] - -jobs: - check-description: - name: Check that PR has description - runs-on: ubuntu-20.04 - if: github.event.pull_request.draft == false - - steps: - - name: Check if PR is shame-worthy - id: is-shame-worthy - run: | - FILTERED_BODY=$( \ - sed -r -e \ - '/^(\.\.\.)|(\*)|(#+ )|( - -SVG - } - - sub include { - my ($self, $content) = @_; - $self->{svg} .= $content; - } - - sub colorAllocate { - my ($self, $r, $g, $b) = @_; - return "rgb($r,$g,$b)"; - } - - sub group_start { - my ($self, $attr) = @_; - - my @g_attr = map { - exists $attr->{$_} ? sprintf(qq/$_="%s"/, $attr->{$_}) : () - } qw(id class); - push @g_attr, $attr->{g_extra} if $attr->{g_extra}; - if ($attr->{href}) { - my @a_attr; - push @a_attr, sprintf qq/xlink:href="%s"/, $attr->{href} if $attr->{href}; - # default target=_top else links will open within SVG - push @a_attr, sprintf qq/target="%s"/, $attr->{target} || "_top"; - push @a_attr, $attr->{a_extra} if $attr->{a_extra}; - $self->{svg} .= sprintf qq/\n/, join(' ', (@a_attr, @g_attr)); - } else { - $self->{svg} .= sprintf qq/\n/, join(' ', @g_attr); - } - - $self->{svg} .= sprintf qq/%s<\/title>/, $attr->{title} - if $attr->{title}; # should be first element within g container - } - - sub group_end { - my ($self, $attr) = @_; - $self->{svg} .= $attr->{href} ? qq/<\/a>\n/ : qq/<\/g>\n/; - } - - sub filledRectangle { - my ($self, $x1, $y1, $x2, $y2, $fill, $extra) = @_; - $x1 = sprintf "%0.1f", $x1; - $x2 = sprintf "%0.1f", $x2; - my $w = sprintf "%0.1f", $x2 - $x1; - my $h = sprintf "%0.1f", $y2 - $y1; - $extra = defined $extra ? $extra : ""; - $self->{svg} .= qq/<rect x="$x1" y="$y1" width="$w" height="$h" fill="$fill" $extra \/>\n/; - } - - sub stringTTF { - my ($self, $id, $x, $y, $str, $extra) = @_; - $x = sprintf "%0.2f", $x; - $id = defined $id ? qq/id="$id"/ : ""; - $extra ||= ""; - $self->{svg} .= qq/<text $id x="$x" y="$y" $extra>$str<\/text>\n/; - } - - sub svg { - my $self = shift; - return "$self->{svg}</svg>\n"; - } - 1; -} - -sub namehash { - # Generate a vector hash for the name string, weighting early over - # later characters. We want to pick the same colors for function - # names across different flame graphs. - my $name = shift; - my $vector = 0; - my $weight = 1; - my $max = 1; - my $mod = 10; - # if module name present, trunc to 1st char - $name =~ s/.(.*?)`//; - foreach my $c (split //, $name) { - my $i = (ord $c) % $mod; - $vector += ($i / ($mod++ - 1)) * $weight; - $max += 1 * $weight; - $weight *= 0.70; - last if $mod > 12; - } - return (1 - $vector / $max) -} - -sub color { - my ($type, $hash, $name) = @_; - my ($v1, $v2, $v3); - - if ($hash) { - $v1 = namehash($name); - $v2 = $v3 = namehash(scalar reverse $name); - } else { - $v1 = rand(1); - $v2 = rand(1); - $v3 = rand(1); - } - - # theme palettes - if (defined $type and $type eq "hot") { - my $r = 205 + int(50 * $v3); - my $g = 0 + int(230 * $v1); - my $b = 0 + int(55 * $v2); - return "rgb($r,$g,$b)"; - } - if (defined $type and $type eq "mem") { - my $r = 0; - my $g = 190 + int(50 * $v2); - my $b = 0 + int(210 * $v1); - return "rgb($r,$g,$b)"; - } - if (defined $type and $type eq "io") { - my $r = 80 + int(60 * $v1); - my $g = $r; - my $b = 190 + int(55 * $v2); - return "rgb($r,$g,$b)"; - } - - # multi palettes - if (defined $type and $type eq "java") { - # Handle both annotations (_[j], _[i], ...; which are - # accurate), as well as input that lacks any annotations, as - # best as possible. Without annotations, we get a little hacky - # and match on java|org|com, etc. - if ($name =~ m:_\[j\]$:) { # jit annotation - $type = "green"; - } elsif ($name =~ m:_\[i\]$:) { # inline annotation - $type = "aqua"; - } elsif ($name =~ m:^L?(java|javax|jdk|net|org|com|io|sun)/:) { # Java - $type = "green"; - } elsif ($name =~ m:_\[k\]$:) { # kernel annotation - $type = "orange"; - } elsif ($name =~ /::/) { # C++ - $type = "yellow"; - } else { # system - $type = "red"; - } - # fall-through to color palettes - } - if (defined $type and $type eq "perl") { - if ($name =~ /::/) { # C++ - $type = "yellow"; - } elsif ($name =~ m:Perl: or $name =~ m:\.pl:) { # Perl - $type = "green"; - } elsif ($name =~ m:_\[k\]$:) { # kernel - $type = "orange"; - } else { # system - $type = "red"; - } - # fall-through to color palettes - } - if (defined $type and $type eq "js") { - # Handle both annotations (_[j], _[i], ...; which are - # accurate), as well as input that lacks any annotations, as - # best as possible. Without annotations, we get a little hacky, - # and match on a "/" with a ".js", etc. - if ($name =~ m:_\[j\]$:) { # jit annotation - if ($name =~ m:/:) { - $type = "green"; # source - } else { - $type = "aqua"; # builtin - } - } elsif ($name =~ /::/) { # C++ - $type = "yellow"; - } elsif ($name =~ m:/.*\.js:) { # JavaScript (match "/" in path) - $type = "green"; - } elsif ($name =~ m/:/) { # JavaScript (match ":" in builtin) - $type = "aqua"; - } elsif ($name =~ m/^ $/) { # Missing symbol - $type = "green"; - } elsif ($name =~ m:_\[k\]:) { # kernel - $type = "orange"; - } else { # system - $type = "red"; - } - # fall-through to color palettes - } - if (defined $type and $type eq "wakeup") { - $type = "aqua"; - # fall-through to color palettes - } - if (defined $type and $type eq "chain") { - if ($name =~ m:_\[w\]:) { # waker - $type = "aqua" - } else { # off-CPU - $type = "blue"; - } - # fall-through to color palettes - } - - # color palettes - if (defined $type and $type eq "red") { - my $r = 200 + int(55 * $v1); - my $x = 50 + int(80 * $v1); - return "rgb($r,$x,$x)"; - } - if (defined $type and $type eq "green") { - my $g = 200 + int(55 * $v1); - my $x = 50 + int(60 * $v1); - return "rgb($x,$g,$x)"; - } - if (defined $type and $type eq "blue") { - my $b = 205 + int(50 * $v1); - my $x = 80 + int(60 * $v1); - return "rgb($x,$x,$b)"; - } - if (defined $type and $type eq "yellow") { - my $x = 175 + int(55 * $v1); - my $b = 50 + int(20 * $v1); - return "rgb($x,$x,$b)"; - } - if (defined $type and $type eq "purple") { - my $x = 190 + int(65 * $v1); - my $g = 80 + int(60 * $v1); - return "rgb($x,$g,$x)"; - } - if (defined $type and $type eq "aqua") { - my $r = 50 + int(60 * $v1); - my $g = 165 + int(55 * $v1); - my $b = 165 + int(55 * $v1); - return "rgb($r,$g,$b)"; - } - if (defined $type and $type eq "orange") { - my $r = 190 + int(65 * $v1); - my $g = 90 + int(65 * $v1); - return "rgb($r,$g,0)"; - } - - return "rgb(0,0,0)"; -} - -sub color_scale { - my ($value, $max) = @_; - my ($r, $g, $b) = (255, 255, 255); - $value = -$value if $negate; - if ($value > 0) { - $g = $b = int(210 * ($max - $value) / $max); - } elsif ($value < 0) { - $r = $g = int(210 * ($max + $value) / $max); - } - return "rgb($r,$g,$b)"; -} - -sub color_map { - my ($colors, $func) = @_; - if (exists $palette_map{$func}) { - return $palette_map{$func}; - } else { - $palette_map{$func} = color($colors, $hash, $func); - return $palette_map{$func}; - } -} - -sub write_palette { - open(FILE, ">$pal_file"); - foreach my $key (sort keys %palette_map) { - print FILE $key."->".$palette_map{$key}."\n"; - } - close(FILE); -} - -sub read_palette { - if (-e $pal_file) { - open(FILE, $pal_file) or die "can't open file $pal_file: $!"; - while ( my $line = <FILE>) { - chomp($line); - (my $key, my $value) = split("->",$line); - $palette_map{$key}=$value; - } - close(FILE) - } -} - -my %Node; # Hash of merged frame data -my %Tmp; - -# flow() merges two stacks, storing the merged frames and value data in %Node. -sub flow { - my ($last, $this, $v, $d) = @_; - - my $len_a = @$last - 1; - my $len_b = @$this - 1; - - my $i = 0; - my $len_same; - for (; $i <= $len_a; $i++) { - last if $i > $len_b; - last if $last->[$i] ne $this->[$i]; - } - $len_same = $i; - - for ($i = $len_a; $i >= $len_same; $i--) { - my $k = "$last->[$i];$i"; - # a unique ID is constructed from "func;depth;etime"; - # func-depth isn't unique, it may be repeated later. - $Node{"$k;$v"}->{stime} = delete $Tmp{$k}->{stime}; - if (defined $Tmp{$k}->{delta}) { - $Node{"$k;$v"}->{delta} = delete $Tmp{$k}->{delta}; - } - delete $Tmp{$k}; - } - - for ($i = $len_same; $i <= $len_b; $i++) { - my $k = "$this->[$i];$i"; - $Tmp{$k}->{stime} = $v; - if (defined $d) { - $Tmp{$k}->{delta} += $i == $len_b ? $d : 0; - } - } - - return $this; -} - -# parse input -my @Data; -my @SortedData; -my $last = []; -my $time = 0; -my $delta = undef; -my $ignored = 0; -my $line; -my $maxdelta = 1; - -# reverse if needed -foreach (<>) { - chomp; - $line = $_; - if ($stackreverse) { - # there may be an extra samples column for differentials - # XXX todo: redo these REs as one. It's repeated below. - my($stack, $samples) = (/^(.*)\s+?(\d+(?:\.\d*)?)$/); - my $samples2 = undef; - if ($stack =~ /^(.*)\s+?(\d+(?:\.\d*)?)$/) { - $samples2 = $samples; - ($stack, $samples) = $stack =~ (/^(.*)\s+?(\d+(?:\.\d*)?)$/); - unshift @Data, join(";", reverse split(";", $stack)) . " $samples $samples2"; - } else { - unshift @Data, join(";", reverse split(";", $stack)) . " $samples"; - } - } else { - unshift @Data, $line; - } -} - -if ($flamechart) { - # In flame chart mode, just reverse the data so time moves from left to right. - @SortedData = reverse @Data; -} else { - @SortedData = sort @Data; -} - -# process and merge frames -foreach (@SortedData) { - chomp; - # process: folded_stack count - # eg: func_a;func_b;func_c 31 - my ($stack, $samples) = (/^(.*)\s+?(\d+(?:\.\d*)?)$/); - unless (defined $samples and defined $stack) { - ++$ignored; - next; - } - - # there may be an extra samples column for differentials: - my $samples2 = undef; - if ($stack =~ /^(.*)\s+?(\d+(?:\.\d*)?)$/) { - $samples2 = $samples; - ($stack, $samples) = $stack =~ (/^(.*)\s+?(\d+(?:\.\d*)?)$/); - } - $delta = undef; - if (defined $samples2) { - $delta = $samples2 - $samples; - $maxdelta = abs($delta) if abs($delta) > $maxdelta; - } - - # for chain graphs, annotate waker frames with "_[w]", for later - # coloring. This is a hack, but has a precedent ("_[k]" from perf). - if ($colors eq "chain") { - my @parts = split ";--;", $stack; - my @newparts = (); - $stack = shift @parts; - $stack .= ";--;"; - foreach my $part (@parts) { - $part =~ s/;/_[w];/g; - $part .= "_[w]"; - push @newparts, $part; - } - $stack .= join ";--;", @parts; - } - - # merge frames and populate %Node: - $last = flow($last, [ '', split ";", $stack ], $time, $delta); - - if (defined $samples2) { - $time += $samples2; - } else { - $time += $samples; - } -} -flow($last, [], $time, $delta); - -warn "Ignored $ignored lines with invalid format\n" if $ignored; -unless ($time) { - warn "ERROR: No stack counts found\n"; - my $im = SVG->new(); - # emit an error message SVG, for tools automating flamegraph use - my $imageheight = $fontsize * 5; - $im->header($imagewidth, $imageheight); - $im->stringTTF(undef, int($imagewidth / 2), $fontsize * 2, - "ERROR: No valid input provided to flamegraph.pl."); - print $im->svg; - exit 2; -} -if ($timemax and $timemax < $time) { - warn "Specified --total $timemax is less than actual total $time, so ignored\n" - if $timemax/$time > 0.02; # only warn is significant (e.g., not rounding etc) - undef $timemax; -} -$timemax ||= $time; - -my $widthpertime = ($imagewidth - 2 * $xpad) / $timemax; -my $minwidth_time = $minwidth / $widthpertime; - -# prune blocks that are too narrow and determine max depth -while (my ($id, $node) = each %Node) { - my ($func, $depth, $etime) = split ";", $id; - my $stime = $node->{stime}; - die "missing start for $id" if not defined $stime; - - if (($etime-$stime) < $minwidth_time) { - delete $Node{$id}; - next; - } - $depthmax = $depth if $depth > $depthmax; -} - -# draw canvas, and embed interactive JavaScript program -my $imageheight = (($depthmax + 1) * $frameheight) + $ypad1 + $ypad2; -$imageheight += $ypad3 if $subtitletext ne ""; -my $titlesize = $fontsize + 5; -my $im = SVG->new(); -my ($black, $vdgrey, $dgrey) = ( - $im->colorAllocate(0, 0, 0), - $im->colorAllocate(160, 160, 160), - $im->colorAllocate(200, 200, 200), - ); -$im->header($imagewidth, $imageheight); -my $inc = <<INC; -<defs> - <linearGradient id="background" y1="0" y2="1" x1="0" x2="0" > - <stop stop-color="$bgcolor1" offset="5%" /> - <stop stop-color="$bgcolor2" offset="95%" /> - </linearGradient> -</defs> -<style type="text/css"> - text { font-family:$fonttype; font-size:${fontsize}px; fill:$black; } - #search, #ignorecase { opacity:0.1; cursor:pointer; } - #search:hover, #search.show, #ignorecase:hover, #ignorecase.show { opacity:1; } - #subtitle { text-anchor:middle; font-color:$vdgrey; } - #title { text-anchor:middle; font-size:${titlesize}px} - #unzoom { cursor:pointer; } - #frames > *:hover { stroke:black; stroke-width:0.5; cursor:pointer; } - .hide { display:none; } - .parent { opacity:0.5; } -</style> -<script type="text/ecmascript"> -<![CDATA[ - "use strict"; - var details, searchbtn, unzoombtn, matchedtxt, svg, searching, currentSearchTerm, ignorecase, ignorecaseBtn; - function init(evt) { - details = document.getElementById("details").firstChild; - searchbtn = document.getElementById("search"); - ignorecaseBtn = document.getElementById("ignorecase"); - unzoombtn = document.getElementById("unzoom"); - matchedtxt = document.getElementById("matched"); - svg = document.getElementsByTagName("svg")[0]; - searching = 0; - currentSearchTerm = null; - } - - window.addEventListener("click", function(e) { - var target = find_group(e.target); - if (target) { - if (target.nodeName == "a") { - if (e.ctrlKey === false) return; - e.preventDefault(); - } - if (target.classList.contains("parent")) unzoom(); - zoom(target); - } - else if (e.target.id == "unzoom") unzoom(); - else if (e.target.id == "search") search_prompt(); - else if (e.target.id == "ignorecase") toggle_ignorecase(); - }, false) - - // mouse-over for info - // show - window.addEventListener("mouseover", function(e) { - var target = find_group(e.target); - if (target) details.nodeValue = "$nametype " + g_to_text(target); - }, false) - - // clear - window.addEventListener("mouseout", function(e) { - var target = find_group(e.target); - if (target) details.nodeValue = ' '; - }, false) - - // ctrl-F for search - window.addEventListener("keydown",function (e) { - if (e.keyCode === 114 || (e.ctrlKey && e.keyCode === 70)) { - e.preventDefault(); - search_prompt(); - } - }, false) - - // ctrl-I to toggle case-sensitive search - window.addEventListener("keydown",function (e) { - if (e.ctrlKey && e.keyCode === 73) { - e.preventDefault(); - toggle_ignorecase(); - } - }, false) - - // functions - function find_child(node, selector) { - var children = node.querySelectorAll(selector); - if (children.length) return children[0]; - return; - } - function find_group(node) { - var parent = node.parentElement; - if (!parent) return; - if (parent.id == "frames") return node; - return find_group(parent); - } - function orig_save(e, attr, val) { - if (e.attributes["_orig_" + attr] != undefined) return; - if (e.attributes[attr] == undefined) return; - if (val == undefined) val = e.attributes[attr].value; - e.setAttribute("_orig_" + attr, val); - } - function orig_load(e, attr) { - if (e.attributes["_orig_"+attr] == undefined) return; - e.attributes[attr].value = e.attributes["_orig_" + attr].value; - e.removeAttribute("_orig_"+attr); - } - function g_to_text(e) { - var text = find_child(e, "title").firstChild.nodeValue; - return (text) - } - function g_to_func(e) { - var func = g_to_text(e); - // if there's any manipulation we want to do to the function - // name before it's searched, do it here before returning. - return (func); - } - function update_text(e) { - var r = find_child(e, "rect"); - var t = find_child(e, "text"); - var w = parseFloat(r.attributes.width.value) -3; - var txt = find_child(e, "title").textContent.replace(/\\([^(]*\\)\$/,""); - t.attributes.x.value = parseFloat(r.attributes.x.value) + 3; - - // Smaller than this size won't fit anything - if (w < 2 * $fontsize * $fontwidth) { - t.textContent = ""; - return; - } - - t.textContent = txt; - // Fit in full text width - if (/^ *\$/.test(txt) || t.getSubStringLength(0, txt.length) < w) - return; - - for (var x = txt.length - 2; x > 0; x--) { - if (t.getSubStringLength(0, x + 2) <= w) { - t.textContent = txt.substring(0, x) + ".."; - return; - } - } - t.textContent = ""; - } - - // zoom - function zoom_reset(e) { - if (e.attributes != undefined) { - orig_load(e, "x"); - orig_load(e, "width"); - } - if (e.childNodes == undefined) return; - for (var i = 0, c = e.childNodes; i < c.length; i++) { - zoom_reset(c[i]); - } - } - function zoom_child(e, x, ratio) { - if (e.attributes != undefined) { - if (e.attributes.x != undefined) { - orig_save(e, "x"); - e.attributes.x.value = (parseFloat(e.attributes.x.value) - x - $xpad) * ratio + $xpad; - if (e.tagName == "text") - e.attributes.x.value = find_child(e.parentNode, "rect[x]").attributes.x.value + 3; - } - if (e.attributes.width != undefined) { - orig_save(e, "width"); - e.attributes.width.value = parseFloat(e.attributes.width.value) * ratio; - } - } - - if (e.childNodes == undefined) return; - for (var i = 0, c = e.childNodes; i < c.length; i++) { - zoom_child(c[i], x - $xpad, ratio); - } - } - function zoom_parent(e) { - if (e.attributes) { - if (e.attributes.x != undefined) { - orig_save(e, "x"); - e.attributes.x.value = $xpad; - } - if (e.attributes.width != undefined) { - orig_save(e, "width"); - e.attributes.width.value = parseInt(svg.width.baseVal.value) - ($xpad * 2); - } - } - if (e.childNodes == undefined) return; - for (var i = 0, c = e.childNodes; i < c.length; i++) { - zoom_parent(c[i]); - } - } - function zoom(node) { - var attr = find_child(node, "rect").attributes; - var width = parseFloat(attr.width.value); - var xmin = parseFloat(attr.x.value); - var xmax = parseFloat(xmin + width); - var ymin = parseFloat(attr.y.value); - var ratio = (svg.width.baseVal.value - 2 * $xpad) / width; - - // XXX: Workaround for JavaScript float issues (fix me) - var fudge = 0.0001; - - unzoombtn.classList.remove("hide"); - - var el = document.getElementById("frames").children; - for (var i = 0; i < el.length; i++) { - var e = el[i]; - var a = find_child(e, "rect").attributes; - var ex = parseFloat(a.x.value); - var ew = parseFloat(a.width.value); - var upstack; - // Is it an ancestor - if ($inverted == 0) { - upstack = parseFloat(a.y.value) > ymin; - } else { - upstack = parseFloat(a.y.value) < ymin; - } - if (upstack) { - // Direct ancestor - if (ex <= xmin && (ex+ew+fudge) >= xmax) { - e.classList.add("parent"); - zoom_parent(e); - update_text(e); - } - // not in current path - else - e.classList.add("hide"); - } - // Children maybe - else { - // no common path - if (ex < xmin || ex + fudge >= xmax) { - e.classList.add("hide"); - } - else { - zoom_child(e, xmin, ratio); - update_text(e); - } - } - } - search(); - } - function unzoom() { - unzoombtn.classList.add("hide"); - var el = document.getElementById("frames").children; - for(var i = 0; i < el.length; i++) { - el[i].classList.remove("parent"); - el[i].classList.remove("hide"); - zoom_reset(el[i]); - update_text(el[i]); - } - search(); - } - - // search - function toggle_ignorecase() { - ignorecase = !ignorecase; - if (ignorecase) { - ignorecaseBtn.classList.add("show"); - } else { - ignorecaseBtn.classList.remove("show"); - } - reset_search(); - search(); - } - function reset_search() { - var el = document.querySelectorAll("#frames rect"); - for (var i = 0; i < el.length; i++) { - orig_load(el[i], "fill") - } - } - function search_prompt() { - if (!searching) { - var term = prompt("Enter a search term (regexp " + - "allowed, eg: ^ext4_)" - + (ignorecase ? ", ignoring case" : "") - + "\\nPress Ctrl-i to toggle case sensitivity", ""); - if (term != null) { - currentSearchTerm = term; - search(); - } - } else { - reset_search(); - searching = 0; - currentSearchTerm = null; - searchbtn.classList.remove("show"); - searchbtn.firstChild.nodeValue = "Search" - matchedtxt.classList.add("hide"); - matchedtxt.firstChild.nodeValue = "" - } - } - function search(term) { - if (currentSearchTerm === null) return; - var term = currentSearchTerm; - - var re = new RegExp(term, ignorecase ? 'i' : ''); - var el = document.getElementById("frames").children; - var matches = new Object(); - var maxwidth = 0; - for (var i = 0; i < el.length; i++) { - var e = el[i]; - var func = g_to_func(e); - var rect = find_child(e, "rect"); - if (func == null || rect == null) - continue; - - // Save max width. Only works as we have a root frame - var w = parseFloat(rect.attributes.width.value); - if (w > maxwidth) - maxwidth = w; - - if (func.match(re)) { - // highlight - var x = parseFloat(rect.attributes.x.value); - orig_save(rect, "fill"); - rect.attributes.fill.value = "$searchcolor"; - - // remember matches - if (matches[x] == undefined) { - matches[x] = w; - } else { - if (w > matches[x]) { - // overwrite with parent - matches[x] = w; - } - } - searching = 1; - } - } - if (!searching) - return; - - searchbtn.classList.add("show"); - searchbtn.firstChild.nodeValue = "Reset Search"; - - // calculate percent matched, excluding vertical overlap - var count = 0; - var lastx = -1; - var lastw = 0; - var keys = Array(); - for (k in matches) { - if (matches.hasOwnProperty(k)) - keys.push(k); - } - // sort the matched frames by their x location - // ascending, then width descending - keys.sort(function(a, b){ - return a - b; - }); - // Step through frames saving only the biggest bottom-up frames - // thanks to the sort order. This relies on the tree property - // where children are always smaller than their parents. - var fudge = 0.0001; // JavaScript floating point - for (var k in keys) { - var x = parseFloat(keys[k]); - var w = matches[keys[k]]; - if (x >= lastx + lastw - fudge) { - count += w; - lastx = x; - lastw = w; - } - } - // display matched percent - matchedtxt.classList.remove("hide"); - var pct = 100 * count / maxwidth; - if (pct != 100) pct = pct.toFixed(1) - matchedtxt.firstChild.nodeValue = "Matched: " + pct + "%"; - } -]]> -</script> -INC -$im->include($inc); -$im->filledRectangle(0, 0, $imagewidth, $imageheight, 'url(#background)'); -$im->stringTTF("title", int($imagewidth / 2), $fontsize * 2, $titletext); -$im->stringTTF("subtitle", int($imagewidth / 2), $fontsize * 4, $subtitletext) if $subtitletext ne ""; -$im->stringTTF("details", $xpad, $imageheight - ($ypad2 / 2), " "); -$im->stringTTF("unzoom", $xpad, $fontsize * 2, "Reset Zoom", 'class="hide"'); -$im->stringTTF("search", $imagewidth - $xpad - 100, $fontsize * 2, "Search"); -$im->stringTTF("ignorecase", $imagewidth - $xpad - 16, $fontsize * 2, "ic"); -$im->stringTTF("matched", $imagewidth - $xpad - 100, $imageheight - ($ypad2 / 2), " "); - -if ($palette) { - read_palette(); -} - -# draw frames -$im->group_start({id => "frames"}); -while (my ($id, $node) = each %Node) { - my ($func, $depth, $etime) = split ";", $id; - my $stime = $node->{stime}; - my $delta = $node->{delta}; - - $etime = $timemax if $func eq "" and $depth == 0; - - my $x1 = $xpad + $stime * $widthpertime; - my $x2 = $xpad + $etime * $widthpertime; - my ($y1, $y2); - unless ($inverted) { - $y1 = $imageheight - $ypad2 - ($depth + 1) * $frameheight + $framepad; - $y2 = $imageheight - $ypad2 - $depth * $frameheight; - } else { - $y1 = $ypad1 + $depth * $frameheight; - $y2 = $ypad1 + ($depth + 1) * $frameheight - $framepad; - } - - my $samples = sprintf "%.0f", ($etime - $stime) * $factor; - (my $samples_txt = $samples) # add commas per perlfaq5 - =~ s/(^[-+]?\d+?(?=(?>(?:\d{3})+)(?!\d))|\G\d{3}(?=\d))/$1,/g; - - my $info; - if ($func eq "" and $depth == 0) { - $info = "all ($samples_txt $countname, 100%)"; - } else { - my $pct = sprintf "%.2f", ((100 * $samples) / ($timemax * $factor)); - my $escaped_func = $func; - # clean up SVG breaking characters: - $escaped_func =~ s/&/&/g; - $escaped_func =~ s/</</g; - $escaped_func =~ s/>/>/g; - $escaped_func =~ s/"/"/g; - $escaped_func =~ s/_\[[kwij]\]$//; # strip any annotation - unless (defined $delta) { - $info = "$escaped_func ($samples_txt $countname, $pct%)"; - } else { - my $d = $negate ? -$delta : $delta; - my $deltapct = sprintf "%.2f", ((100 * $d) / ($timemax * $factor)); - $deltapct = $d > 0 ? "+$deltapct" : $deltapct; - $info = "$escaped_func ($samples_txt $countname, $pct%; $deltapct%)"; - } - } - - my $nameattr = { %{ $nameattr{$func}||{} } }; # shallow clone - $nameattr->{title} ||= $info; - $im->group_start($nameattr); - - my $color; - if ($func eq "--") { - $color = $vdgrey; - } elsif ($func eq "-") { - $color = $dgrey; - } elsif (defined $delta) { - $color = color_scale($delta, $maxdelta); - } elsif ($palette) { - $color = color_map($colors, $func); - } else { - $color = color($colors, $hash, $func); - } - $im->filledRectangle($x1, $y1, $x2, $y2, $color, 'rx="2" ry="2"'); - - my $chars = int( ($x2 - $x1) / ($fontsize * $fontwidth)); - my $text = ""; - if ($chars >= 3) { #Β room for one char plus two dots - $func =~ s/_\[[kwij]\]$//; # strip any annotation - $text = substr $func, 0, $chars; - substr($text, -2, 2) = ".." if $chars < length $func; - $text =~ s/&/&/g; - $text =~ s/</</g; - $text =~ s/>/>/g; - } - $im->stringTTF(undef, $x1 + 3, 3 + ($y1 + $y2) / 2, $text); - - $im->group_end($nameattr); -} -$im->group_end(); - -print $im->svg; - -if ($palette) { - write_palette(); -} - -# vim: ts=8 sts=8 sw=8 noexpandtab diff --git a/ee/clickhouse/materialized_columns/__init__.py b/ee/clickhouse/materialized_columns/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/clickhouse/materialized_columns/analyze.py b/ee/clickhouse/materialized_columns/analyze.py deleted file mode 100644 index fd1d1d09cb..0000000000 --- a/ee/clickhouse/materialized_columns/analyze.py +++ /dev/null @@ -1,213 +0,0 @@ -from collections import defaultdict -import re -from datetime import timedelta -from typing import Optional -from collections.abc import Generator - -import structlog - -from ee.clickhouse.materialized_columns.columns import ( - DEFAULT_TABLE_COLUMN, - MaterializedColumn, - backfill_materialized_columns, - get_materialized_columns, - materialize, -) -from ee.settings import ( - MATERIALIZE_COLUMNS_ANALYSIS_PERIOD_HOURS, - MATERIALIZE_COLUMNS_BACKFILL_PERIOD_DAYS, - MATERIALIZE_COLUMNS_MAX_AT_ONCE, - MATERIALIZE_COLUMNS_MINIMUM_QUERY_TIME, -) -from posthog.cache_utils import instance_memoize -from posthog.client import sync_execute -from posthog.models.filters.mixins.utils import cached_property -from posthog.models.person.sql import ( - GET_EVENT_PROPERTIES_COUNT, - GET_PERSON_PROPERTIES_COUNT, -) -from posthog.models.property import PropertyName, TableColumn, TableWithProperties -from posthog.models.property_definition import PropertyDefinition -from posthog.models.team import Team -from posthog.settings import CLICKHOUSE_CLUSTER - -Suggestion = tuple[TableWithProperties, TableColumn, PropertyName] - -logger = structlog.get_logger(__name__) - - -class TeamManager: - @instance_memoize - def person_properties(self, team_id: str) -> set[str]: - return self._get_properties(GET_PERSON_PROPERTIES_COUNT, team_id) - - @instance_memoize - def event_properties(self, team_id: str) -> set[str]: - return set( - PropertyDefinition.objects.filter(team_id=team_id, type=PropertyDefinition.Type.EVENT).values_list( - "name", flat=True - ) - ) - - @instance_memoize - def person_on_events_properties(self, team_id: str) -> set[str]: - return self._get_properties(GET_EVENT_PROPERTIES_COUNT.format(column_name="person_properties"), team_id) - - def _get_properties(self, query, team_id) -> set[str]: - rows = sync_execute(query, {"team_id": team_id}) - return {name for name, _ in rows} - - -class Query: - def __init__( - self, - query_string: str, - query_time_ms: float, - min_query_time=MATERIALIZE_COLUMNS_MINIMUM_QUERY_TIME, - ): - self.query_string = query_string - self.query_time_ms = query_time_ms - self.min_query_time = min_query_time - - @property - def cost(self) -> int: - return int((self.query_time_ms - self.min_query_time) / 1000) + 1 - - @cached_property - def is_valid(self): - return self.team_id is not None and Team.objects.filter(pk=self.team_id).exists() - - @cached_property - def team_id(self) -> Optional[str]: - matches = re.findall(r"team_id = (\d+)", self.query_string) - return matches[0] if matches else None - - @cached_property - def _all_properties(self) -> list[tuple[str, PropertyName]]: - return re.findall(r"JSONExtract\w+\((\S+), '([^']+)'\)", self.query_string) - - def properties( - self, team_manager: TeamManager - ) -> Generator[tuple[TableWithProperties, TableColumn, PropertyName], None, None]: - # Reverse-engineer whether a property is an "event" or "person" property by getting their event definitions. - # :KLUDGE: Note that the same property will be found on both tables if both are used. - # We try to hone in on the right column by looking at the column from which the property is extracted. - person_props = team_manager.person_properties(self.team_id) - event_props = team_manager.event_properties(self.team_id) - person_on_events_props = team_manager.person_on_events_properties(self.team_id) - - for table_column, property in self._all_properties: - if property in event_props: - yield "events", DEFAULT_TABLE_COLUMN, property - if property in person_props: - yield "person", DEFAULT_TABLE_COLUMN, property - - if property in person_on_events_props and "person_properties" in table_column: - yield "events", "person_properties", property - - -def _analyze(since_hours_ago: int, min_query_time: int, team_id: Optional[int] = None) -> list[Suggestion]: - "Finds columns that should be materialized" - - raw_queries = sync_execute( - """ -WITH - {min_query_time} as slow_query_minimum, - ( - 159, -- TIMEOUT EXCEEDED - 160, -- TOO SLOW (estimated query execution time) - ) as exception_codes, - 20 * 1000 * 1000 * 1000 as min_bytes_read, - 5000000 as min_read_rows -SELECT - arrayJoin( - extractAll(query, 'JSONExtract[a-zA-Z0-9]*?\\((?:[a-zA-Z0-9\\`_-]+\\.)?(.*?), .*?\\)') - ) as column, - arrayJoin(extractAll(query, 'JSONExtract[a-zA-Z0-9]*?\\(.*?, \\'([a-zA-Z0-9_\\-\\.\\$\\/\\ ]*?)\\'\\)')) as prop_to_materialize - --,groupUniqArrayIf(JSONExtractInt(log_comment, 'team_id'), type > 2), - --count(), - --countIf(type > 2) as failures, - --countIf(query_duration_ms > 3000) as slow_query, - --formatReadableSize(avg(read_bytes)), - --formatReadableSize(max(read_bytes)) -FROM - clusterAllReplicas({cluster}, system, query_log) -WHERE - query_start_time > now() - toIntervalHour({since}) - and query LIKE '%JSONExtract%' - and query not LIKE '%JSONExtractKeysAndValuesRaw(group_properties)%' - and type > 1 - and is_initial_query - and JSONExtractString(log_comment, 'access_method') != 'personal_api_key' -- API requests failing is less painful than queries in the interface - and JSONExtractString(log_comment, 'kind') != 'celery' - and JSONExtractInt(log_comment, 'team_id') != 0 - and query not like '%person_distinct_id2%' -- Old style person properties that are joined, no need to optimize those queries - and column IN ('properties', 'person_properties', 'group0_properties', 'group1_properties', 'group2_properties', 'group3_properties', 'group4_properties') - and read_bytes > min_bytes_read - and (exception_code IN exception_codes OR query_duration_ms > slow_query_minimum) - and read_rows > min_read_rows - {team_id_filter} -GROUP BY - 1, 2 -HAVING - countIf(exception_code IN exception_codes) > 0 OR countIf(query_duration_ms > slow_query_minimum) > 9 -ORDER BY - countIf(exception_code IN exception_codes) DESC, - countIf(query_duration_ms > slow_query_minimum) DESC -LIMIT 100 -- Make sure we don't add 100s of columns in one run - """.format( - since=since_hours_ago, - min_query_time=min_query_time, - team_id_filter=f"and JSONExtractInt(log_comment, 'team_id') = {team_id}" if team_id else "", - cluster=CLICKHOUSE_CLUSTER, - ), - ) - - return [("events", table_column, property_name) for (table_column, property_name) in raw_queries] - - -def materialize_properties_task( - properties_to_materialize: Optional[list[Suggestion]] = None, - time_to_analyze_hours: int = MATERIALIZE_COLUMNS_ANALYSIS_PERIOD_HOURS, - maximum: int = MATERIALIZE_COLUMNS_MAX_AT_ONCE, - min_query_time: int = MATERIALIZE_COLUMNS_MINIMUM_QUERY_TIME, - backfill_period_days: int = MATERIALIZE_COLUMNS_BACKFILL_PERIOD_DAYS, - dry_run: bool = False, - team_id_to_analyze: Optional[int] = None, - is_nullable: bool = False, -) -> None: - """ - Creates materialized columns for event and person properties based off of slow queries - """ - - if properties_to_materialize is None: - properties_to_materialize = _analyze(time_to_analyze_hours, min_query_time, team_id_to_analyze) - - properties_by_table: dict[TableWithProperties, list[tuple[TableColumn, PropertyName]]] = defaultdict(list) - for table, table_column, property_name in properties_to_materialize: - properties_by_table[table].append((table_column, property_name)) - - result: list[Suggestion] = [] - for table, properties in properties_by_table.items(): - existing_materialized_properties = get_materialized_columns(table).keys() - for table_column, property_name in properties: - if (property_name, table_column) not in existing_materialized_properties: - result.append((table, table_column, property_name)) - - if len(result) > 0: - logger.info(f"Calculated columns that could be materialized. count={len(result)}") - else: - logger.info("Found no columns to materialize.") - - materialized_columns: dict[TableWithProperties, list[MaterializedColumn]] = defaultdict(list) - for table, table_column, property_name in result[:maximum]: - logger.info(f"Materializing column. table={table}, property_name={property_name}") - if not dry_run: - materialized_columns[table].append( - materialize(table, property_name, table_column=table_column, is_nullable=is_nullable) - ) - - if backfill_period_days > 0 and not dry_run: - logger.info(f"Starting backfill for new materialized columns. period_days={backfill_period_days}") - for table, columns in materialized_columns.items(): - backfill_materialized_columns(table, columns, timedelta(days=backfill_period_days)) diff --git a/ee/clickhouse/materialized_columns/columns.py b/ee/clickhouse/materialized_columns/columns.py deleted file mode 100644 index ab051fee55..0000000000 --- a/ee/clickhouse/materialized_columns/columns.py +++ /dev/null @@ -1,489 +0,0 @@ -from __future__ import annotations - -import logging -import re -from collections.abc import Callable, Iterable, Iterator -from copy import copy -from dataclasses import dataclass, replace -from datetime import timedelta -from typing import Any, Literal, TypeVar, cast - -from clickhouse_driver import Client -from django.utils.timezone import now - -from posthog.cache_utils import cache_for -from posthog.clickhouse.client.connection import default_client -from posthog.clickhouse.cluster import ClickhouseCluster, ConnectionInfo, FuturesMap, HostInfo -from posthog.clickhouse.kafka_engine import trim_quotes_expr -from posthog.clickhouse.materialized_columns import ColumnName, TablesWithMaterializedColumns -from posthog.client import sync_execute -from posthog.models.event.sql import EVENTS_DATA_TABLE -from posthog.models.person.sql import PERSONS_TABLE -from posthog.models.property import PropertyName, TableColumn, TableWithProperties -from posthog.models.utils import generate_random_short_suffix -from posthog.settings import CLICKHOUSE_DATABASE, CLICKHOUSE_PER_TEAM_SETTINGS, TEST - - -logger = logging.getLogger(__name__) - -T = TypeVar("T") - -DEFAULT_TABLE_COLUMN: Literal["properties"] = "properties" - -SHORT_TABLE_COLUMN_NAME = { - "properties": "p", - "group_properties": "gp", - "person_properties": "pp", - "group0_properties": "gp0", - "group1_properties": "gp1", - "group2_properties": "gp2", - "group3_properties": "gp3", - "group4_properties": "gp4", -} - - -@dataclass -class MaterializedColumn: - name: ColumnName - details: MaterializedColumnDetails - is_nullable: bool - - @property - def type(self) -> str: - if self.is_nullable: - return "Nullable(String)" - else: - return "String" - - def get_expression_and_parameters(self) -> tuple[str, dict[str, Any]]: - if self.is_nullable: - return ( - f"JSONExtract({self.details.table_column}, %(property_name)s, %(property_type)s)", - {"property_name": self.details.property_name, "property_type": self.type}, - ) - else: - return ( - trim_quotes_expr(f"JSONExtractRaw({self.details.table_column}, %(property)s)"), - {"property": self.details.property_name}, - ) - - @staticmethod - def get_all(table: TablesWithMaterializedColumns) -> Iterator[MaterializedColumn]: - rows = sync_execute( - """ - SELECT name, comment, type like 'Nullable(%%)' as is_nullable - FROM system.columns - WHERE database = %(database)s - AND table = %(table)s - AND comment LIKE '%%column_materializer::%%' - AND comment not LIKE '%%column_materializer::elements_chain::%%' - """, - {"database": CLICKHOUSE_DATABASE, "table": table}, - ) - - for name, comment, is_nullable in rows: - yield MaterializedColumn(name, MaterializedColumnDetails.from_column_comment(comment), is_nullable) - - @staticmethod - def get(table: TablesWithMaterializedColumns, column_name: ColumnName) -> MaterializedColumn: - # TODO: It would be more efficient to push the filter here down into the `get_all` query, but that would require - # more a sophisticated method of constructing queries than we have right now, and this data set should be small - # enough that this doesn't really matter (at least as of writing.) - columns = [column for column in MaterializedColumn.get_all(table) if column.name == column_name] - match columns: - case []: - raise ValueError("column does not exist") - case [column]: - return column - case _: - # this should never happen (column names are unique within a table) and suggests an error in the query - raise ValueError(f"got {len(columns)} columns, expected 0 or 1") - - -@dataclass(frozen=True) -class MaterializedColumnDetails: - table_column: TableColumn - property_name: PropertyName - is_disabled: bool - - COMMENT_PREFIX = "column_materializer" - COMMENT_SEPARATOR = "::" - COMMENT_DISABLED_MARKER = "disabled" - - def as_column_comment(self) -> str: - bits = [self.COMMENT_PREFIX, self.table_column, self.property_name] - if self.is_disabled: - bits.append(self.COMMENT_DISABLED_MARKER) - return self.COMMENT_SEPARATOR.join(bits) - - @classmethod - def from_column_comment(cls, comment: str) -> MaterializedColumnDetails: - match comment.split(cls.COMMENT_SEPARATOR, 3): - # Old style comments have the format "column_materializer::property", dealing with the default table column. - case [cls.COMMENT_PREFIX, property_name]: - return MaterializedColumnDetails(DEFAULT_TABLE_COLUMN, property_name, is_disabled=False) - # Otherwise, it's "column_materializer::table_column::property" for columns that are active. - case [cls.COMMENT_PREFIX, table_column, property_name]: - return MaterializedColumnDetails(cast(TableColumn, table_column), property_name, is_disabled=False) - # Columns that are marked as disabled have an extra trailer indicating their status. - case [cls.COMMENT_PREFIX, table_column, property_name, cls.COMMENT_DISABLED_MARKER]: - return MaterializedColumnDetails(cast(TableColumn, table_column), property_name, is_disabled=True) - case _: - raise ValueError(f"unexpected comment format: {comment!r}") - - -def get_materialized_columns( - table: TablesWithMaterializedColumns, -) -> dict[tuple[PropertyName, TableColumn], MaterializedColumn]: - return { - (column.details.property_name, column.details.table_column): column - for column in MaterializedColumn.get_all(table) - } - - -@cache_for(timedelta(minutes=15)) -def get_enabled_materialized_columns( - table: TablesWithMaterializedColumns, -) -> dict[tuple[PropertyName, TableColumn], MaterializedColumn]: - return {k: column for k, column in get_materialized_columns(table).items() if not column.details.is_disabled} - - -def get_cluster() -> ClickhouseCluster: - extra_hosts = [] - for host_config in map(copy, CLICKHOUSE_PER_TEAM_SETTINGS.values()): - extra_hosts.append(ConnectionInfo(host_config.pop("host"))) - assert len(host_config) == 0, f"unexpected values: {host_config!r}" - return ClickhouseCluster(default_client(), extra_hosts=extra_hosts) - - -@dataclass -class TableInfo: - data_table: str - - @property - def read_table(self) -> str: - return self.data_table - - def map_data_nodes(self, cluster: ClickhouseCluster, fn: Callable[[Client], T]) -> FuturesMap[HostInfo, T]: - return cluster.map_all_hosts(fn) - - -@dataclass -class ShardedTableInfo(TableInfo): - dist_table: str - - @property - def read_table(self) -> str: - return self.dist_table - - def map_data_nodes(self, cluster: ClickhouseCluster, fn: Callable[[Client], T]) -> FuturesMap[HostInfo, T]: - return cluster.map_one_host_per_shard(fn) - - -tables: dict[str, TableInfo | ShardedTableInfo] = { - PERSONS_TABLE: TableInfo(PERSONS_TABLE), - "events": ShardedTableInfo(EVENTS_DATA_TABLE(), "events"), -} - - -def get_minmax_index_name(column: str) -> str: - return f"minmax_{column}" - - -@dataclass -class CreateColumnOnDataNodesTask: - table: str - column: MaterializedColumn - create_minmax_index: bool - add_column_comment: bool - - def execute(self, client: Client) -> None: - expression, parameters = self.column.get_expression_and_parameters() - actions = [ - f"ADD COLUMN IF NOT EXISTS {self.column.name} {self.column.type} MATERIALIZED {expression}", - ] - - if self.add_column_comment: - actions.append(f"COMMENT COLUMN {self.column.name} %(comment)s") - parameters["comment"] = self.column.details.as_column_comment() - - if self.create_minmax_index: - index_name = get_minmax_index_name(self.column.name) - actions.append(f"ADD INDEX IF NOT EXISTS {index_name} {self.column.name} TYPE minmax GRANULARITY 1") - - client.execute( - f"ALTER TABLE {self.table} " + ", ".join(actions), - parameters, - settings={"alter_sync": 2 if TEST else 1}, - ) - - -@dataclass -class CreateColumnOnQueryNodesTask: - table: str - column: MaterializedColumn - - def execute(self, client: Client) -> None: - client.execute( - f""" - ALTER TABLE {self.table} - ADD COLUMN IF NOT EXISTS {self.column.name} {self.column.type}, - COMMENT COLUMN {self.column.name} %(comment)s - """, - {"comment": self.column.details.as_column_comment()}, - settings={"alter_sync": 2 if TEST else 1}, - ) - - -def materialize( - table: TableWithProperties, - property: PropertyName, - column_name: ColumnName | None = None, - table_column: TableColumn = DEFAULT_TABLE_COLUMN, - create_minmax_index=not TEST, - is_nullable: bool = False, -) -> MaterializedColumn: - if existing_column := get_materialized_columns(table).get((property, table_column)): - if TEST: - return existing_column - - raise ValueError(f"Property already materialized. table={table}, property={property}, column={table_column}") - - if table_column not in SHORT_TABLE_COLUMN_NAME: - raise ValueError(f"Invalid table_column={table_column} for materialisation") - - cluster = get_cluster() - table_info = tables[table] - - column = MaterializedColumn( - name=column_name or _materialized_column_name(table, property, table_column), - details=MaterializedColumnDetails( - table_column=table_column, - property_name=property, - is_disabled=False, - ), - is_nullable=is_nullable, - ) - - table_info.map_data_nodes( - cluster, - CreateColumnOnDataNodesTask( - table_info.data_table, - column, - create_minmax_index, - add_column_comment=table_info.read_table == table_info.data_table, - ).execute, - ).result() - - if isinstance(table_info, ShardedTableInfo): - cluster.map_all_hosts( - CreateColumnOnQueryNodesTask( - table_info.dist_table, - column, - ).execute - ).result() - - return column - - -@dataclass -class UpdateColumnCommentTask: - table: str - columns: list[MaterializedColumn] - - def execute(self, client: Client) -> None: - actions = [] - parameters = {} - for i, column in enumerate(self.columns): - parameter_name = f"comment_{i}" - actions.append(f"COMMENT COLUMN {column.name} %({parameter_name})s") - parameters[parameter_name] = column.details.as_column_comment() - - client.execute( - f"ALTER TABLE {self.table} " + ", ".join(actions), - parameters, - settings={"alter_sync": 2 if TEST else 1}, - ) - - -def update_column_is_disabled( - table: TablesWithMaterializedColumns, column_names: Iterable[str], is_disabled: bool -) -> None: - cluster = get_cluster() - table_info = tables[table] - - columns = [MaterializedColumn.get(table, column_name) for column_name in column_names] - - cluster.map_all_hosts( - UpdateColumnCommentTask( - table_info.read_table, - [replace(column, details=replace(column.details, is_disabled=is_disabled)) for column in columns], - ).execute - ).result() - - -def check_index_exists(client: Client, table: str, index: str) -> bool: - [(count,)] = client.execute( - """ - SELECT count() - FROM system.data_skipping_indices - WHERE database = currentDatabase() AND table = %(table)s AND name = %(name)s - """, - {"table": table, "name": index}, - ) - assert 1 >= count >= 0 - return bool(count) - - -def check_column_exists(client: Client, table: str, column: str) -> bool: - [(count,)] = client.execute( - """ - SELECT count() - FROM system.columns - WHERE database = currentDatabase() AND table = %(table)s AND name = %(name)s - """, - {"table": table, "name": column}, - ) - assert 1 >= count >= 0 - return bool(count) - - -@dataclass -class DropColumnTask: - table: str - column_names: list[str] - try_drop_index: bool - - def execute(self, client: Client) -> None: - actions = [] - - for column_name in self.column_names: - if self.try_drop_index: - index_name = get_minmax_index_name(column_name) - drop_index_action = f"DROP INDEX IF EXISTS {index_name}" - if check_index_exists(client, self.table, index_name): - actions.append(drop_index_action) - else: - logger.info("Skipping %r, nothing to do...", drop_index_action) - - drop_column_action = f"DROP COLUMN IF EXISTS {column_name}" - if check_column_exists(client, self.table, column_name): - actions.append(drop_column_action) - else: - logger.info("Skipping %r, nothing to do...", drop_column_action) - - if actions: - client.execute( - f"ALTER TABLE {self.table} " + ", ".join(actions), - settings={"alter_sync": 2 if TEST else 1}, - ) - - -def drop_column(table: TablesWithMaterializedColumns, column_names: Iterable[str]) -> None: - cluster = get_cluster() - table_info = tables[table] - column_names = [*column_names] - - if isinstance(table_info, ShardedTableInfo): - cluster.map_all_hosts( - DropColumnTask( - table_info.dist_table, - column_names, - try_drop_index=False, # no indexes on distributed tables - ).execute - ).result() - - table_info.map_data_nodes( - cluster, - DropColumnTask( - table_info.data_table, - column_names, - try_drop_index=True, - ).execute, - ).result() - - -@dataclass -class BackfillColumnTask: - table: str - columns: list[MaterializedColumn] - backfill_period: timedelta | None - test_settings: dict[str, Any] | None - - def execute(self, client: Client) -> None: - # Hack from https://github.com/ClickHouse/ClickHouse/issues/19785 - # Note that for this to work all inserts should list columns explicitly - # Improve this if https://github.com/ClickHouse/ClickHouse/issues/27730 ever gets resolved - for column in self.columns: - expression, parameters = column.get_expression_and_parameters() - client.execute( - f""" - ALTER TABLE {self.table} - MODIFY COLUMN {column.name} {column.type} DEFAULT {expression} - """, - parameters, - settings=self.test_settings, - ) - - # Kick off mutations which will update clickhouse partitions in the background. This will return immediately - assignments = ", ".join(f"{column.name} = {column.name}" for column in self.columns) - - if self.backfill_period is not None: - where_clause = "timestamp > %(cutoff)s" - parameters = {"cutoff": (now() - self.backfill_period).strftime("%Y-%m-%d")} - else: - where_clause = "1 = 1" - parameters = {} - - client.execute( - f"ALTER TABLE {self.table} UPDATE {assignments} WHERE {where_clause}", - parameters, - settings=self.test_settings, - ) - - -def backfill_materialized_columns( - table: TableWithProperties, - columns: Iterable[MaterializedColumn], - backfill_period: timedelta, - test_settings=None, -) -> None: - """ - Backfills the materialized column after its creation. - - This will require reading and writing a lot of data on clickhouse disk. - """ - cluster = get_cluster() - table_info = tables[table] - - table_info.map_data_nodes( - cluster, - BackfillColumnTask( - table_info.data_table, - [*columns], - backfill_period if table == "events" else None, # XXX - test_settings, - ).execute, - ).result() - - -def _materialized_column_name( - table: TableWithProperties, - property: PropertyName, - table_column: TableColumn = DEFAULT_TABLE_COLUMN, -) -> ColumnName: - "Returns a sanitized and unique column name to use for materialized column" - - prefix = "pmat_" if table == "person" else "mat_" - - if table_column != DEFAULT_TABLE_COLUMN: - prefix += f"{SHORT_TABLE_COLUMN_NAME[table_column]}_" - property_str = re.sub("[^0-9a-zA-Z$]", "_", property) - - existing_materialized_column_names = {column.name for column in get_materialized_columns(table).values()} - suffix = "" - - while f"{prefix}{property_str}{suffix}" in existing_materialized_column_names: - suffix = "_" + generate_random_short_suffix() - - return f"{prefix}{property_str}{suffix}" diff --git a/ee/clickhouse/materialized_columns/test/__init__.py b/ee/clickhouse/materialized_columns/test/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/clickhouse/materialized_columns/test/test_analyze.py b/ee/clickhouse/materialized_columns/test/test_analyze.py deleted file mode 100644 index 3b225ab670..0000000000 --- a/ee/clickhouse/materialized_columns/test/test_analyze.py +++ /dev/null @@ -1,57 +0,0 @@ -from posthog.test.base import BaseTest, ClickhouseTestMixin -from posthog.client import sync_execute -from ee.clickhouse.materialized_columns.analyze import materialize_properties_task - -from unittest.mock import patch, call - - -class TestMaterializedColumnsAnalyze(ClickhouseTestMixin, BaseTest): - @patch("ee.clickhouse.materialized_columns.analyze.materialize") - @patch("ee.clickhouse.materialized_columns.analyze.backfill_materialized_columns") - def test_mat_columns(self, patch_backfill, patch_materialize): - sync_execute("SYSTEM FLUSH LOGS") - sync_execute("TRUNCATE TABLE system.query_log") - - queries_to_insert = [ - "SELECT * FROM events WHERE JSONExtractRaw(properties, \\'materialize_me\\')", - "SELECT * FROM events WHERE JSONExtractRaw(properties, \\'materialize_me\\')", - "SELECT * FROM events WHERE JSONExtractRaw(properties, \\'materialize_me2\\')", - "SELECT * FROM events WHERE JSONExtractRaw(`e`.properties, \\'materialize_me3\\')", - "SELECT * FROM events WHERE JSONExtractRaw(person_properties, \\'materialize_person_prop\\')", - "SELECT * FROM groups WHERE JSONExtractRaw(group.group_properties, \\'materialize_person_prop\\')", # this should not appear - "SELECT * FROM groups WHERE JSONExtractRaw(group.group_properties, \\'nested\\', \\'property\\')", # this should not appear - ] - - for query in queries_to_insert: - sync_execute( - """ - INSERT INTO system.query_log ( - query, - query_start_time, - type, - is_initial_query, - log_comment, - exception_code, - read_bytes, - read_rows - ) VALUES ( - '{query}', - now(), - 3, - 1, - '{log_comment}', - 159, - 40000000000, - 10000000 - ) - """.format(query=query, log_comment='{"team_id": 2}') - ) - materialize_properties_task() - patch_materialize.assert_has_calls( - [ - call("events", "materialize_me", table_column="properties", is_nullable=False), - call("events", "materialize_me2", table_column="properties", is_nullable=False), - call("events", "materialize_person_prop", table_column="person_properties", is_nullable=False), - call("events", "materialize_me3", table_column="properties", is_nullable=False), - ] - ) diff --git a/ee/clickhouse/materialized_columns/test/test_columns.py b/ee/clickhouse/materialized_columns/test/test_columns.py deleted file mode 100644 index bf09121143..0000000000 --- a/ee/clickhouse/materialized_columns/test/test_columns.py +++ /dev/null @@ -1,419 +0,0 @@ -from datetime import timedelta -from time import sleep -from collections.abc import Iterable -from unittest import TestCase -from unittest.mock import patch - -from freezegun import freeze_time - -from ee.clickhouse.materialized_columns.columns import ( - MaterializedColumn, - MaterializedColumnDetails, - backfill_materialized_columns, - drop_column, - get_enabled_materialized_columns, - get_materialized_columns, - materialize, - update_column_is_disabled, -) -from ee.tasks.materialized_columns import mark_all_materialized -from posthog.clickhouse.materialized_columns import TablesWithMaterializedColumns -from posthog.client import sync_execute -from posthog.conftest import create_clickhouse_tables -from posthog.constants import GROUP_TYPES_LIMIT -from posthog.models.event.sql import EVENTS_DATA_TABLE -from posthog.models.property import PropertyName, TableColumn -from posthog.settings import CLICKHOUSE_DATABASE -from posthog.test.base import BaseTest, ClickhouseTestMixin, _create_event - -EVENTS_TABLE_DEFAULT_MATERIALIZED_COLUMNS = [f"$group_{i}" for i in range(GROUP_TYPES_LIMIT)] + [ - "$session_id", - "$window_id", -] - - -class TestMaterializedColumnDetails(TestCase): - def test_column_comment_formats(self): - old_format_comment = "column_materializer::foo" - old_format_details = MaterializedColumnDetails.from_column_comment(old_format_comment) - assert old_format_details == MaterializedColumnDetails( - "properties", # the default - "foo", - is_disabled=False, - ) - # old comment format is implicitly upgraded to the newer format when serializing - assert old_format_details.as_column_comment() == "column_materializer::properties::foo" - - new_format_comment = "column_materializer::person_properties::bar" - new_format_details = MaterializedColumnDetails.from_column_comment(new_format_comment) - assert new_format_details == MaterializedColumnDetails( - "person_properties", - "bar", - is_disabled=False, - ) - assert new_format_details.as_column_comment() == new_format_comment - - new_format_disabled_comment = "column_materializer::person_properties::bar::disabled" - new_format_disabled_details = MaterializedColumnDetails.from_column_comment(new_format_disabled_comment) - assert new_format_disabled_details == MaterializedColumnDetails( - "person_properties", - "bar", - is_disabled=True, - ) - assert new_format_disabled_details.as_column_comment() == new_format_disabled_comment - - with self.assertRaises(ValueError): - MaterializedColumnDetails.from_column_comment("bad-prefix::property") - - with self.assertRaises(ValueError): - MaterializedColumnDetails.from_column_comment("bad-prefix::column::property") - - with self.assertRaises(ValueError): - MaterializedColumnDetails.from_column_comment("column_materializer::column::property::enabled") - - -class TestMaterializedColumns(ClickhouseTestMixin, BaseTest): - def setUp(self): - self.recreate_database() - return super().setUp() - - def tearDown(self): - self.recreate_database() - super().tearDown() - - def recreate_database(self): - sync_execute(f"DROP DATABASE {CLICKHOUSE_DATABASE} SYNC") - sync_execute(f"CREATE DATABASE {CLICKHOUSE_DATABASE}") - create_clickhouse_tables(0) - - def test_get_columns_default(self): - self.assertCountEqual( - [property_name for property_name, _ in get_materialized_columns("events")], - EVENTS_TABLE_DEFAULT_MATERIALIZED_COLUMNS, - ) - self.assertCountEqual(get_materialized_columns("person"), []) - - def test_caching_and_materializing(self): - with freeze_time("2020-01-04T13:01:01Z"): - materialize("events", "$foo", create_minmax_index=True) - materialize("events", "$bar", create_minmax_index=True) - materialize("person", "$zeta", create_minmax_index=True) - - self.assertCountEqual( - [ - property_name - for property_name, _ in get_enabled_materialized_columns("events", use_cache=True).keys() - ], - ["$foo", "$bar", *EVENTS_TABLE_DEFAULT_MATERIALIZED_COLUMNS], - ) - self.assertCountEqual( - get_enabled_materialized_columns("person", use_cache=True).keys(), - [("$zeta", "properties")], - ) - - materialize("events", "abc", create_minmax_index=True) - - self.assertCountEqual( - [ - property_name - for property_name, _ in get_enabled_materialized_columns("events", use_cache=True).keys() - ], - ["$foo", "$bar", *EVENTS_TABLE_DEFAULT_MATERIALIZED_COLUMNS], - ) - - with freeze_time("2020-01-04T14:00:01Z"): - self.assertCountEqual( - [ - property_name - for property_name, _ in get_enabled_materialized_columns("events", use_cache=True).keys() - ], - ["$foo", "$bar", "abc", *EVENTS_TABLE_DEFAULT_MATERIALIZED_COLUMNS], - ) - - @patch("secrets.choice", return_value="X") - def test_materialized_column_naming(self, mock_choice): - assert materialize("events", "$foO();--sqlinject", create_minmax_index=True).name == "mat_$foO_____sqlinject" - - mock_choice.return_value = "Y" - assert ( - materialize("events", "$foO();ÀÀsqlinject", create_minmax_index=True).name == "mat_$foO_____sqlinject_YYYY" - ) - - mock_choice.return_value = "Z" - assert ( - materialize("events", "$foO_____sqlinject", create_minmax_index=True).name == "mat_$foO_____sqlinject_ZZZZ" - ) - - assert materialize("person", "SoMePrOp", create_minmax_index=True).name == "pmat_SoMePrOp" - - def test_backfilling_data(self): - sync_execute("ALTER TABLE events DROP COLUMN IF EXISTS mat_prop") - sync_execute("ALTER TABLE events DROP COLUMN IF EXISTS mat_another") - - _create_event( - event="some_event", - distinct_id="1", - team=self.team, - timestamp="2020-01-01 00:00:00", - properties={"prop": 1}, - ) - _create_event( - event="some_event", - distinct_id="1", - team=self.team, - timestamp="2021-05-02 00:00:00", - properties={"prop": 2, "another": 5}, - ) - _create_event( - event="some_event", - distinct_id="1", - team=self.team, - timestamp="2021-05-03 00:00:00", - properties={"prop": 3}, - ) - _create_event( - event="another_event", - distinct_id="1", - team=self.team, - timestamp="2021-05-04 00:00:00", - ) - _create_event( - event="third_event", - distinct_id="1", - team=self.team, - timestamp="2021-05-05 00:00:00", - properties={"prop": 4}, - ) - _create_event( - event="fourth_event", - distinct_id="1", - team=self.team, - timestamp="2021-05-06 00:00:00", - properties={"another": 6}, - ) - - columns = [ - materialize("events", "prop", create_minmax_index=True), - materialize("events", "another", create_minmax_index=True), - ] - - self.assertEqual(self._count_materialized_rows("mat_prop"), 0) - self.assertEqual(self._count_materialized_rows("mat_another"), 0) - - with freeze_time("2021-05-10T14:00:01Z"): - backfill_materialized_columns( - "events", - columns, - timedelta(days=50), - test_settings={"mutations_sync": "0"}, - ) - - _create_event( - event="fifth_event", - distinct_id="1", - team=self.team, - timestamp="2021-05-07 00:00:00", - properties={"another": 7}, - ) - - iterations = 0 - while self._get_count_of_mutations_running() > 0 and iterations < 100: - sleep(0.1) - iterations += 1 - - self.assertGreaterEqual(self._count_materialized_rows("mat_prop"), 4) - self.assertGreaterEqual(self._count_materialized_rows("mat_another"), 4) - - self.assertEqual( - sync_execute("SELECT mat_prop, mat_another FROM events ORDER BY timestamp"), - [ - ("1", ""), - ("2", "5"), - ("3", ""), - ("", ""), - ("4", ""), - ("", "6"), - ("", "7"), - ], - ) - - def test_column_types(self): - columns = [ - materialize("events", "myprop", create_minmax_index=True), - materialize("events", "myprop_nullable", create_minmax_index=True, is_nullable=True), - ] - - expr_nonnullable = "replaceRegexpAll(JSONExtractRaw(properties, 'myprop'), '^\"|\"$', '')" - expr_nullable = "JSONExtract(properties, 'myprop_nullable', 'Nullable(String)')" - self.assertEqual(("String", "MATERIALIZED", expr_nonnullable), self._get_column_types("mat_myprop")) - self.assertEqual( - ("Nullable(String)", "MATERIALIZED", expr_nullable), self._get_column_types("mat_myprop_nullable") - ) - - backfill_materialized_columns("events", columns, timedelta(days=50)) - self.assertEqual(("String", "DEFAULT", expr_nonnullable), self._get_column_types("mat_myprop")) - self.assertEqual(("Nullable(String)", "DEFAULT", expr_nullable), self._get_column_types("mat_myprop_nullable")) - - mark_all_materialized() - self.assertEqual(("String", "MATERIALIZED", expr_nonnullable), self._get_column_types("mat_myprop")) - self.assertEqual( - ("Nullable(String)", "MATERIALIZED", expr_nullable), self._get_column_types("mat_myprop_nullable") - ) - - def _count_materialized_rows(self, column): - return sync_execute( - """ - SELECT sum(rows) - FROM system.parts_columns - WHERE database = %(database)s - AND table = %(table)s - AND column = %(column)s - """, - { - "database": CLICKHOUSE_DATABASE, - "table": EVENTS_DATA_TABLE(), - "column": column, - }, - )[0][0] - - def _get_count_of_mutations_running(self) -> int: - return sync_execute( - """ - SELECT count(*) - FROM system.mutations - WHERE is_done = 0 - """ - )[0][0] - - def _get_column_types(self, column: str): - return sync_execute( - """ - SELECT type, default_kind, default_expression - FROM system.columns - WHERE database = %(database)s AND table = %(table)s AND name = %(column)s - """, - { - "database": CLICKHOUSE_DATABASE, - "table": EVENTS_DATA_TABLE(), - "column": column, - }, - )[0] - - def test_lifecycle(self): - table: TablesWithMaterializedColumns = "events" - property_names = ["foo", "bar"] - source_column: TableColumn = "properties" - - # create materialized columns - materialized_columns = {} - for property_name in property_names: - materialized_columns[property_name] = materialize( - table, property_name, table_column=source_column, create_minmax_index=True - ).name - - assert set(property_names) == materialized_columns.keys() - - # ensure they exist everywhere - for property_name, destination_column in materialized_columns.items(): - key = (property_name, source_column) - assert get_materialized_columns(table)[key].name == destination_column - assert MaterializedColumn.get(table, destination_column) == MaterializedColumn( - destination_column, - MaterializedColumnDetails(source_column, property_name, is_disabled=False), - is_nullable=False, - ) - - # disable them and ensure updates apply as needed - update_column_is_disabled(table, materialized_columns.values(), is_disabled=True) - for property_name, destination_column in materialized_columns.items(): - key = (property_name, source_column) - assert get_materialized_columns(table)[key].name == destination_column - assert MaterializedColumn.get(table, destination_column) == MaterializedColumn( - destination_column, - MaterializedColumnDetails(source_column, property_name, is_disabled=True), - is_nullable=False, - ) - - # re-enable them and ensure updates apply as needed - update_column_is_disabled(table, materialized_columns.values(), is_disabled=False) - for property_name, destination_column in materialized_columns.items(): - key = (property_name, source_column) - assert get_materialized_columns(table)[key].name == destination_column - assert MaterializedColumn.get(table, destination_column) == MaterializedColumn( - destination_column, - MaterializedColumnDetails(source_column, property_name, is_disabled=False), - is_nullable=False, - ) - - # drop them and ensure updates apply as needed - drop_column(table, materialized_columns.values()) - for property_name, destination_column in materialized_columns.items(): - key = (property_name, source_column) - assert key not in get_materialized_columns(table) - with self.assertRaises(ValueError): - MaterializedColumn.get(table, destination_column) - - def _get_latest_mutation_id(self, table: str) -> str: - [(mutation_id,)] = sync_execute( - """ - SELECT max(mutation_id) - FROM system.mutations - WHERE - database = currentDatabase() - AND table = %(table)s - """, - {"table": table}, - ) - return mutation_id - - def _get_mutations_since_id(self, table: str, id: str) -> Iterable[str]: - return [ - command - for (command,) in sync_execute( - """ - SELECT command - FROM system.mutations - WHERE - database = currentDatabase() - AND table = %(table)s - AND mutation_id > %(mutation_id)s - ORDER BY mutation_id - """, - {"table": table, "mutation_id": id}, - ) - ] - - def test_drop_optimized_no_index(self): - table: TablesWithMaterializedColumns = ( - "person" # little bit easier than events because no shard awareness needed - ) - property: PropertyName = "myprop" - source_column: TableColumn = "properties" - - destination_column = materialize(table, property, table_column=source_column, create_minmax_index=False) - - latest_mutation_id_before_drop = self._get_latest_mutation_id(table) - - drop_column(table, destination_column.name) - - mutations_ran = self._get_mutations_since_id(table, latest_mutation_id_before_drop) - assert not any("DROP INDEX" in mutation for mutation in mutations_ran) - - def test_drop_optimized_no_column(self): - table: TablesWithMaterializedColumns = ( - "person" # little bit easier than events because no shard awareness needed - ) - property: PropertyName = "myprop" - source_column: TableColumn = "properties" - - # create the materialized column - destination_column = materialize(table, property, table_column=source_column, create_minmax_index=False) - - sync_execute(f"ALTER TABLE {table} DROP COLUMN {destination_column.name}", settings={"alter_sync": 1}) - - latest_mutation_id_before_drop = self._get_latest_mutation_id(table) - - drop_column(table, destination_column.name) - - mutations_ran = self._get_mutations_since_id(table, latest_mutation_id_before_drop) - assert not any("DROP COLUMN" in mutation for mutation in mutations_ran) diff --git a/ee/clickhouse/materialized_columns/test/test_query.py b/ee/clickhouse/materialized_columns/test/test_query.py deleted file mode 100644 index 3a55a0614f..0000000000 --- a/ee/clickhouse/materialized_columns/test/test_query.py +++ /dev/null @@ -1,24 +0,0 @@ -from posthog.test.base import APIBaseTest, ClickhouseTestMixin - - -class TestQuery(ClickhouseTestMixin, APIBaseTest): - def test_get_queries_detects(self): - # some random - with self.capture_select_queries() as queries: - self.client.post( - f"/api/projects/{self.team.id}/insights/funnel/", - { - "events": [{"id": "step one", "type": "events", "order": 0}], - "funnel_window_days": 14, - "funnel_order_type": "unordered", - "insight": "funnels", - }, - ).json() - - self.assertTrue(len(queries)) - - # make sure that the queries start with a discoverable prefix. - # If this changes, also update ee/clickhouse/materialized_columns/analyze.py::_get_queries to - # filter on the right queries - for q in queries: - self.assertTrue(q.startswith("/* user_id")) diff --git a/ee/clickhouse/materialized_columns/util.py b/ee/clickhouse/materialized_columns/util.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/clickhouse/models/__init__.py b/ee/clickhouse/models/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/clickhouse/models/group.py b/ee/clickhouse/models/group.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/clickhouse/models/test/__init__.py b/ee/clickhouse/models/test/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/clickhouse/models/test/__snapshots__/test_cohort.ambr b/ee/clickhouse/models/test/__snapshots__/test_cohort.ambr deleted file mode 100644 index 24db8bb3f1..0000000000 --- a/ee/clickhouse/models/test/__snapshots__/test_cohort.ambr +++ /dev/null @@ -1,300 +0,0 @@ -# serializer version: 1 -# name: TestCohort.test_cohortpeople_basic - ''' - /* cohort_calculation: */ - INSERT INTO cohortpeople - SELECT id, - 99999 as cohort_id, - 99999 as team_id, - 1 AS sign, - 0 AS version - FROM - (SELECT id - FROM person - WHERE team_id = 99999 - AND id IN - (SELECT id - FROM person - WHERE team_id = 99999 - AND ((((has(['something'], replaceRegexpAll(JSONExtractRaw(properties, '$some_prop'), '^"|"$', '')))) - AND ((has(['something'], replaceRegexpAll(JSONExtractRaw(properties, '$another_prop'), '^"|"$', '')))))) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND ((((has(['something'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), '$some_prop'), '^"|"$', '')))) - AND ((has(['something'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), '$another_prop'), '^"|"$', '')))))) SETTINGS optimize_aggregation_in_order = 1) as person - UNION ALL - SELECT person_id, - cohort_id, - team_id, - -1, - version - FROM cohortpeople - WHERE team_id = 99999 - AND cohort_id = 99999 - AND version < 0 - AND sign = 1 SETTINGS optimize_aggregation_in_order = 1, - join_algorithm = 'auto' - ''' -# --- -# name: TestCohort.test_cohortpeople_with_not_in_cohort_operator - ''' - /* cohort_calculation: */ - INSERT INTO cohortpeople - SELECT id, - 99999 as cohort_id, - 99999 as team_id, - 1 AS sign, - 0 AS version - FROM - (SELECT id - FROM person - WHERE team_id = 99999 - AND id IN - (SELECT id - FROM person - WHERE team_id = 99999 - AND ((has(['something1'], replaceRegexpAll(JSONExtractRaw(properties, '$some_prop'), '^"|"$', '')))) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND ((has(['something1'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), '$some_prop'), '^"|"$', '')))) SETTINGS optimize_aggregation_in_order = 1) as person - UNION ALL - SELECT person_id, - cohort_id, - team_id, - -1, - version - FROM cohortpeople - WHERE team_id = 99999 - AND cohort_id = 99999 - AND version < 0 - AND sign = 1 SETTINGS optimize_aggregation_in_order = 1, - join_algorithm = 'auto' - ''' -# --- -# name: TestCohort.test_cohortpeople_with_not_in_cohort_operator.1 - ''' - /* cohort_calculation: */ - INSERT INTO cohortpeople - SELECT id, - 99999 as cohort_id, - 99999 as team_id, - 1 AS sign, - 0 AS version - FROM - (SELECT person.person_id AS id - FROM - (SELECT pdi.person_id AS person_id, - countIf(timestamp > now() - INTERVAL 2 year - AND timestamp < now() - AND event = '$pageview' - AND 1=1) > 0 AS performed_event_condition_X_level_level_0_level_0_level_0_0 - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event IN ['$pageview'] - AND timestamp <= now() - AND timestamp >= now() - INTERVAL 2 year - GROUP BY person_id) behavior_query - INNER JOIN - (SELECT *, - id AS person_id - FROM - (SELECT id - FROM person - WHERE team_id = 99999 - AND id IN - (SELECT id - FROM person - WHERE team_id = 99999 - AND (((((NOT has(['something1'], replaceRegexpAll(JSONExtractRaw(properties, '$some_prop'), '^"|"$', ''))))))) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND (((((NOT has(['something1'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), '$some_prop'), '^"|"$', ''))))))) SETTINGS optimize_aggregation_in_order = 1)) person ON person.person_id = behavior_query.person_id - WHERE 1 = 1 - AND ((((performed_event_condition_X_level_level_0_level_0_level_0_0)))) SETTINGS optimize_aggregation_in_order = 1, - join_algorithm = 'auto' ) as person - UNION ALL - SELECT person_id, - cohort_id, - team_id, - -1, - version - FROM cohortpeople - WHERE team_id = 99999 - AND cohort_id = 99999 - AND version < 0 - AND sign = 1 SETTINGS optimize_aggregation_in_order = 1, - join_algorithm = 'auto' - ''' -# --- -# name: TestCohort.test_cohortpeople_with_not_in_cohort_operator_and_no_precalculation - ''' - SELECT uuid, - distinct_id - FROM events - WHERE team_id = 99999 - AND (distinct_id IN - (SELECT distinct_id - FROM - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) - WHERE person_id IN - (SELECT person.person_id AS id - FROM - (SELECT pdi.person_id AS person_id, - countIf(timestamp > now() - INTERVAL 2 year - AND timestamp < now() - AND event = '$pageview' - AND 1=1) > 0 AS performed_event_condition_X_level_level_0_level_0_level_0_0 - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event IN ['$pageview'] - AND timestamp <= now() - AND timestamp >= now() - INTERVAL 2 year - GROUP BY person_id) behavior_query - INNER JOIN - (SELECT *, - id AS person_id - FROM - (SELECT id - FROM person - WHERE team_id = 99999 - AND id IN - (SELECT id - FROM person - WHERE team_id = 99999 - AND (((((NOT has(['something1'], replaceRegexpAll(JSONExtractRaw(properties, '$some_prop'), '^"|"$', ''))))))) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND (((((NOT has(['something1'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), '$some_prop'), '^"|"$', ''))))))) SETTINGS optimize_aggregation_in_order = 1)) person ON person.person_id = behavior_query.person_id - WHERE 1 = 1 - AND ((((performed_event_condition_X_level_level_0_level_0_level_0_0)))) SETTINGS optimize_aggregation_in_order = 1, - join_algorithm = 'auto' ) )) - ''' -# --- -# name: TestCohort.test_cohortpeople_with_not_in_cohort_operator_for_behavioural_cohorts - ''' - /* cohort_calculation: */ - INSERT INTO cohortpeople - SELECT id, - 99999 as cohort_id, - 99999 as team_id, - 1 AS sign, - 0 AS version - FROM - (SELECT behavior_query.person_id AS id - FROM - (SELECT pdi.person_id AS person_id, - minIf(timestamp, event = 'signup') >= now() - INTERVAL 15 day - AND minIf(timestamp, event = 'signup') < now() as first_time_condition_X_level_level_0_level_0_0 - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event IN ['signup'] - GROUP BY person_id) behavior_query - WHERE 1 = 1 - AND (((first_time_condition_X_level_level_0_level_0_0))) SETTINGS optimize_aggregation_in_order = 1, - join_algorithm = 'auto' ) as person - UNION ALL - SELECT person_id, - cohort_id, - team_id, - -1, - version - FROM cohortpeople - WHERE team_id = 99999 - AND cohort_id = 99999 - AND version < 0 - AND sign = 1 SETTINGS optimize_aggregation_in_order = 1, - join_algorithm = 'auto' - ''' -# --- -# name: TestCohort.test_cohortpeople_with_not_in_cohort_operator_for_behavioural_cohorts.1 - ''' - /* cohort_calculation: */ - INSERT INTO cohortpeople - SELECT id, - 99999 as cohort_id, - 99999 as team_id, - 1 AS sign, - 0 AS version - FROM - (SELECT behavior_query.person_id AS id - FROM - (SELECT pdi.person_id AS person_id, - countIf(timestamp > now() - INTERVAL 2 year - AND timestamp < now() - AND event = '$pageview' - AND 1=1) > 0 AS performed_event_condition_X_level_level_0_level_0_level_0_0, - minIf(timestamp, event = 'signup') >= now() - INTERVAL 15 day - AND minIf(timestamp, event = 'signup') < now() as first_time_condition_X_level_level_0_level_1_level_0_level_0_level_0_0 - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event IN ['$pageview', 'signup'] - GROUP BY person_id) behavior_query - WHERE 1 = 1 - AND ((((performed_event_condition_X_level_level_0_level_0_level_0_0)) - AND ((((NOT first_time_condition_X_level_level_0_level_1_level_0_level_0_level_0_0)))))) SETTINGS optimize_aggregation_in_order = 1, - join_algorithm = 'auto' ) as person - UNION ALL - SELECT person_id, - cohort_id, - team_id, - -1, - version - FROM cohortpeople - WHERE team_id = 99999 - AND cohort_id = 99999 - AND version < 0 - AND sign = 1 SETTINGS optimize_aggregation_in_order = 1, - join_algorithm = 'auto' - ''' -# --- -# name: TestCohort.test_static_cohort_precalculated - ''' - - SELECT distinct_id - FROM - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = %(team_id)s - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) - WHERE person_id IN - (SELECT person_id as id - FROM person_static_cohort - WHERE cohort_id = %(_cohort_id_0)s - AND team_id = %(team_id)s) - ''' -# --- diff --git a/ee/clickhouse/models/test/__snapshots__/test_property.ambr b/ee/clickhouse/models/test/__snapshots__/test_property.ambr deleted file mode 100644 index 131ac57b3f..0000000000 --- a/ee/clickhouse/models/test/__snapshots__/test_property.ambr +++ /dev/null @@ -1,155 +0,0 @@ -# serializer version: 1 -# name: TestPropFormat.test_parse_groups - ''' - SELECT uuid - FROM events - WHERE team_id = 99999 - AND ((has(['val_1'], replaceRegexpAll(JSONExtractRaw(properties, 'attr_1'), '^"|"$', '')) - AND has(['val_2'], replaceRegexpAll(JSONExtractRaw(properties, 'attr_2'), '^"|"$', ''))) - OR (has(['val_2'], replaceRegexpAll(JSONExtractRaw(properties, 'attr_1'), '^"|"$', '')))) - ''' -# --- -# name: TestPropFormat.test_parse_groups_persons - ''' - SELECT uuid - FROM events - WHERE team_id = 99999 - AND ((distinct_id IN - (SELECT distinct_id - FROM - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) - WHERE person_id IN - (SELECT id - FROM - (SELECT id, - argMax(properties, person._timestamp) as properties, - max(is_deleted) as is_deleted - FROM person - WHERE team_id = 99999 - GROUP BY id - HAVING is_deleted = 0) - WHERE has(['1@posthog.com'], replaceRegexpAll(JSONExtractRaw(properties, 'email'), '^"|"$', '')) ) )) - OR (distinct_id IN - (SELECT distinct_id - FROM - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) - WHERE person_id IN - (SELECT id - FROM - (SELECT id, - argMax(properties, person._timestamp) as properties, - max(is_deleted) as is_deleted - FROM person - WHERE team_id = 99999 - GROUP BY id - HAVING is_deleted = 0) - WHERE has(['2@posthog.com'], replaceRegexpAll(JSONExtractRaw(properties, 'email'), '^"|"$', '')) ) ))) - ''' -# --- -# name: test_parse_groups_persons_edge_case_with_single_filter - tuple( - 'AND ( has(%(vglobalperson_0)s, "pmat_email"))', - dict({ - 'kglobalperson_0': 'email', - 'vglobalperson_0': list([ - '1@posthog.com', - ]), - }), - ) -# --- -# name: test_parse_prop_clauses_defaults - tuple( - ''' - AND ( has(%(vglobal_0)s, replaceRegexpAll(JSONExtractRaw(properties, %(kglobal_0)s), '^"|"$', '')) AND distinct_id IN ( - SELECT distinct_id - FROM ( - - SELECT distinct_id, argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = %(team_id)s - - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0 - - ) - WHERE person_id IN - ( - SELECT id - FROM ( - SELECT id, argMax(properties, person._timestamp) as properties, max(is_deleted) as is_deleted - FROM person - WHERE team_id = %(team_id)s - GROUP BY id - HAVING is_deleted = 0 - ) - WHERE replaceRegexpAll(JSONExtractRaw(properties, %(kglobalperson_1)s), '^"|"$', '') ILIKE %(vglobalperson_1)s - ) - )) - ''', - dict({ - 'kglobal_0': 'event_prop', - 'kglobalperson_1': 'email', - 'vglobal_0': list([ - 'value', - ]), - 'vglobalperson_1': '%posthog%', - }), - ) -# --- -# name: test_parse_prop_clauses_defaults.1 - tuple( - 'AND ( has(%(vglobal_0)s, replaceRegexpAll(JSONExtractRaw(properties, %(kglobal_0)s), \'^"|"$\', \'\')) AND replaceRegexpAll(JSONExtractRaw(person_props, %(kglobalperson_1)s), \'^"|"$\', \'\') ILIKE %(vglobalperson_1)s)', - dict({ - 'kglobal_0': 'event_prop', - 'kglobalperson_1': 'email', - 'vglobal_0': list([ - 'value', - ]), - 'vglobalperson_1': '%posthog%', - }), - ) -# --- -# name: test_parse_prop_clauses_defaults.2 - tuple( - 'AND ( has(%(vglobal_0)s, replaceRegexpAll(JSONExtractRaw(properties, %(kglobal_0)s), \'^"|"$\', \'\')) AND argMax(person."pmat_email", version) ILIKE %(vpersonquery_global_1)s)', - dict({ - 'kglobal_0': 'event_prop', - 'kpersonquery_global_1': 'email', - 'vglobal_0': list([ - 'value', - ]), - 'vpersonquery_global_1': '%posthog%', - }), - ) -# --- -# name: test_parse_prop_clauses_funnel_step_element_prepend_regression - tuple( - 'AND ( (match(elements_chain, %(PREPEND__text_0_attributes_regex)s)))', - dict({ - 'PREPEND__text_0_attributes_regex': '(text="Insights1")', - }), - ) -# --- -# name: test_parse_prop_clauses_precalculated_cohort - tuple( - ''' - AND ( pdi.person_id IN ( - SELECT DISTINCT person_id FROM cohortpeople WHERE team_id = %(team_id)s AND cohort_id = %(global_cohort_id_0)s AND version = %(global_version_0)s - )) - ''', - dict({ - 'global_cohort_id_0': 42, - 'global_version_0': None, - }), - ) -# --- diff --git a/ee/clickhouse/models/test/test_action.py b/ee/clickhouse/models/test/test_action.py deleted file mode 100644 index b9aaf44a4c..0000000000 --- a/ee/clickhouse/models/test/test_action.py +++ /dev/null @@ -1,318 +0,0 @@ -import dataclasses - -from posthog.client import sync_execute -from posthog.hogql.compiler.bytecode import create_bytecode -from posthog.hogql.hogql import HogQLContext -from posthog.hogql.property import action_to_expr -from posthog.models.action import Action -from posthog.models.action.util import filter_event, format_action_filter -from posthog.models.test.test_event_model import filter_by_actions_factory -from posthog.test.base import ( - BaseTest, - ClickhouseTestMixin, - _create_event, - _create_person, -) -from hogvm.python.operation import Operation as op, HOGQL_BYTECODE_IDENTIFIER as _H, HOGQL_BYTECODE_VERSION - - -@dataclasses.dataclass -class MockEvent: - uuid: str - distinct_id: str - - -def _get_events_for_action(action: Action) -> list[MockEvent]: - hogql_context = HogQLContext(team_id=action.team_id) - formatted_query, params = format_action_filter( - team_id=action.team_id, action=action, prepend="", hogql_context=hogql_context - ) - query = f""" - SELECT - events.uuid, - events.distinct_id - FROM events - WHERE {formatted_query} - AND events.team_id = %(team_id)s - ORDER BY events.timestamp DESC - """ - events = sync_execute( - query, - {"team_id": action.team_id, **params, **hogql_context.values}, - team_id=action.team_id, - ) - return [MockEvent(str(uuid), distinct_id) for uuid, distinct_id in events] - - -EVENT_UUID_QUERY = "SELECT uuid FROM events WHERE {} AND team_id = %(team_id)s" - - -class TestActions( - ClickhouseTestMixin, - filter_by_actions_factory(_create_event, _create_person, _get_events_for_action), # type: ignore -): - pass - - -class TestActionFormat(ClickhouseTestMixin, BaseTest): - def test_filter_event_exact_url(self): - event_target_uuid = _create_event( - event="$autocapture", - team=self.team, - distinct_id="whatever", - properties={"$current_url": "https://posthog.com/feedback/123"}, - ) - - _create_event( - event="$pageview", - team=self.team, - distinct_id="whatever", - properties={"$current_url": "https://posthog.com/feedback/123"}, - ) - - _create_event( - event="$autocapture", - team=self.team, - distinct_id="whatever", - properties={"$current_url": "https://posthog.com/feedback/1234"}, - ) - - action1 = Action.objects.create( - team=self.team, - name="action1", - steps_json=[ - { - "event": "$autocapture", - "url": "https://posthog.com/feedback/123", - "url_matching": "exact", - } - ], - ) - query, params = filter_event(action1.steps[0]) - - full_query = EVENT_UUID_QUERY.format(" AND ".join(query)) - result = sync_execute(full_query, {**params, "team_id": self.team.pk}, team_id=self.team.pk) - - self.assertEqual(len(result), 1) - self.assertCountEqual( - [str(r[0]) for r in result], - [event_target_uuid], - ) - - def test_filter_event_exact_url_with_query_params(self): - first_match_uuid = _create_event( - event="$autocapture", - team=self.team, - distinct_id="whatever", - properties={"$current_url": "https://posthog.com/feedback/123?vip=1"}, - ) - - second_match_uuid = _create_event( - event="$autocapture", - team=self.team, - distinct_id="whatever", - properties={"$current_url": "https://posthog.com/feedback/123?vip=1"}, - ) - - _create_event( - event="$autocapture", - team=self.team, - distinct_id="whatever", - properties={"$current_url": "https://posthog.com/feedback/123?vip=0"}, - ) - - action1 = Action.objects.create( - team=self.team, - name="action1", - steps_json=[ - { - "event": "$autocapture", - "url": "https://posthog.com/feedback/123?vip=1", - "url_matching": "exact", - } - ], - ) - query, params = filter_event(action1.steps[0]) - - full_query = EVENT_UUID_QUERY.format(" AND ".join(query)) - result = sync_execute(full_query, {**params, "team_id": self.team.pk}, team_id=self.team.pk) - - self.assertEqual(len(result), 2) - self.assertCountEqual( - [str(r[0]) for r in result], - [first_match_uuid, second_match_uuid], - ) - - def test_filter_event_contains_url(self): - _create_event( - event="$autocapture", - team=self.team, - distinct_id="whatever", - properties={"$current_url": "https://posthog.com/feedback/123"}, - ) - - _create_event( - event="$pageview", - team=self.team, - distinct_id="whatever", - properties={"$current_url": "https://posthog.com/feedback/123"}, - ) - - _create_event( - event="$autocapture", - team=self.team, - distinct_id="whatever", - properties={"$current_url": "https://posthog.com/feedback/1234"}, - ) - - action1 = Action.objects.create( - team=self.team, - name="action1", - steps_json=[{"event": "$autocapture", "url": "https://posthog.com/feedback/123"}], - ) - query, params = filter_event(action1.steps[0]) - - full_query = EVENT_UUID_QUERY.format(" AND ".join(query)) - result = sync_execute(full_query, {**params, "team_id": self.team.pk}, team_id=self.team.pk) - self.assertEqual(len(result), 2) - - def test_filter_event_regex_url(self): - _create_event( - event="$autocapture", - team=self.team, - distinct_id="whatever", - properties={"$current_url": "https://posthog.com/feedback/123"}, - ) - - _create_event( - event="$autocapture", - team=self.team, - distinct_id="whatever", - properties={"$current_url": "https://test.com/feedback"}, - ) - - _create_event( - event="$autocapture", - team=self.team, - distinct_id="whatever", - properties={"$current_url": "https://posthog.com/feedback/1234"}, - ) - - action1 = Action.objects.create( - team=self.team, - name="action1", - steps_json=[ - { - "event": "$autocapture", - "url": "/123", - "url_matching": "regex", - } - ], - ) - query, params = filter_event(action1.steps[0]) - - full_query = EVENT_UUID_QUERY.format(" AND ".join(query)) - result = sync_execute(full_query, {**params, "team_id": self.team.pk}, team_id=self.team.pk) - self.assertEqual(len(result), 2) - - def test_double(self): - # Tests a regression where the second step properties would override those of the first step, causing issues - _create_event( - event="insight viewed", - team=self.team, - distinct_id="whatever", - properties={"filters_count": 2}, - ) - - action1 = Action.objects.create( - team=self.team, - name="action1", - steps_json=[ - { - "event": "insight viewed", - "properties": [ - { - "key": "insight", - "type": "event", - "value": ["RETENTION"], - "operator": "exact", - } - ], - }, - { - "event": "insight viewed", - "properties": [ - { - "key": "filters_count", - "type": "event", - "value": "1", - "operator": "gt", - } - ], - }, - ], - ) - - events = _get_events_for_action(action1) - self.assertEqual(len(events), 1) - - def test_filter_with_hogql(self): - _create_event( - event="insight viewed", - team=self.team, - distinct_id="first", - properties={"filters_count": 20}, - ) - _create_event( - event="insight viewed", - team=self.team, - distinct_id="second", - properties={"filters_count": 1}, - ) - - action1 = Action.objects.create( - team=self.team, - name="action1", - steps_json=[ - { - "event": "insight viewed", - "properties": [{"key": "toInt(properties.filters_count) > 10", "type": "hogql"}], - } - ], - ) - - events = _get_events_for_action(action1) - self.assertEqual(len(events), 1) - - self.assertEqual(action1.bytecode, create_bytecode(action_to_expr(action1)).bytecode) - self.assertEqual( - action1.bytecode, - [ - _H, - HOGQL_BYTECODE_VERSION, - # event = 'insight viewed' - op.STRING, - "insight viewed", - op.STRING, - "event", - op.GET_GLOBAL, - 1, - op.EQ, - # toInt(properties.filters_count) > 10 - op.INTEGER, - 10, - op.STRING, - "filters_count", - op.STRING, - "properties", - op.GET_GLOBAL, - 2, - op.CALL_GLOBAL, - "toInt", - 1, - op.GT, - # and - op.AND, - 2, - ], - ) diff --git a/ee/clickhouse/models/test/test_cohort.py b/ee/clickhouse/models/test/test_cohort.py deleted file mode 100644 index 1600584169..0000000000 --- a/ee/clickhouse/models/test/test_cohort.py +++ /dev/null @@ -1,1449 +0,0 @@ -from datetime import datetime, timedelta -from typing import Optional - -from django.utils import timezone -from freezegun import freeze_time - -from posthog.client import sync_execute -from posthog.hogql.hogql import HogQLContext -from posthog.models.action import Action -from posthog.models.cohort import Cohort -from posthog.models.cohort.sql import GET_COHORTPEOPLE_BY_COHORT_ID -from posthog.models.cohort.util import format_filter_query -from posthog.models.filters import Filter -from posthog.models.organization import Organization -from posthog.models.person import Person -from posthog.models.property.util import parse_prop_grouped_clauses -from posthog.models.team import Team -from posthog.queries.person_distinct_id_query import get_team_distinct_ids_query -from posthog.queries.util import PersonPropertiesMode -from posthog.schema import PersonsOnEventsMode -from posthog.test.base import ( - BaseTest, - ClickhouseTestMixin, - _create_event, - _create_person, - flush_persons_and_events, - snapshot_clickhouse_insert_cohortpeople_queries, - snapshot_clickhouse_queries, -) -from posthog.models.person.sql import GET_LATEST_PERSON_SQL, GET_PERSON_IDS_BY_FILTER - - -def _create_action(**kwargs): - team = kwargs.pop("team") - name = kwargs.pop("name") - action = Action.objects.create(team=team, name=name, steps_json=[{"event": name}]) - return action - - -def get_person_ids_by_cohort_id( - team_id: int, - cohort_id: int, - limit: Optional[int] = None, - offset: Optional[int] = None, -): - from posthog.models.property.util import parse_prop_grouped_clauses - - filter = Filter(data={"properties": [{"key": "id", "value": cohort_id, "type": "cohort"}]}) - filter_query, filter_params = parse_prop_grouped_clauses( - team_id=team_id, - property_group=filter.property_groups, - table_name="pdi", - hogql_context=filter.hogql_context, - ) - - results = sync_execute( - GET_PERSON_IDS_BY_FILTER.format( - person_query=GET_LATEST_PERSON_SQL, - distinct_query=filter_query, - query="", - GET_TEAM_PERSON_DISTINCT_IDS=get_team_distinct_ids_query(team_id), - offset="OFFSET %(offset)s" if offset else "", - limit="ORDER BY _timestamp ASC LIMIT %(limit)s" if limit else "", - ), - {**filter_params, "team_id": team_id, "offset": offset, "limit": limit}, - ) - - return [str(row[0]) for row in results] - - -class TestCohort(ClickhouseTestMixin, BaseTest): - def _get_cohortpeople(self, cohort: Cohort, *, team_id: Optional[int] = None): - team_id = team_id or cohort.team_id - return sync_execute( - GET_COHORTPEOPLE_BY_COHORT_ID, - { - "team_id": team_id, - "cohort_id": cohort.pk, - "version": cohort.version, - }, - ) - - def test_prop_cohort_basic(self): - _create_person( - distinct_ids=["some_other_id"], - team_id=self.team.pk, - properties={"$some_prop": "something"}, - ) - - _create_person( - distinct_ids=["some_id"], - team_id=self.team.pk, - properties={"$some_prop": "something", "$another_prop": "something"}, - ) - _create_person(distinct_ids=["no_match"], team_id=self.team.pk) - _create_event( - event="$pageview", - team=self.team, - distinct_id="some_id", - properties={"attr": "some_val"}, - ) - - _create_event( - event="$pageview", - team=self.team, - distinct_id="some_other_id", - properties={"attr": "some_val"}, - ) - - cohort1 = Cohort.objects.create( - team=self.team, - groups=[ - { - "properties": [ - {"key": "$some_prop", "value": "something", "type": "person"}, - { - "key": "$another_prop", - "value": "something", - "type": "person", - }, - ] - } - ], - name="cohort1", - ) - - filter = Filter(data={"properties": [{"key": "id", "value": cohort1.pk, "type": "cohort"}]}) - query, params = parse_prop_grouped_clauses( - team_id=self.team.pk, - property_group=filter.property_groups, - hogql_context=filter.hogql_context, - ) - final_query = "SELECT uuid FROM events WHERE team_id = %(team_id)s {}".format(query) - result = sync_execute( - final_query, - {**params, **filter.hogql_context.values, "team_id": self.team.pk}, - ) - self.assertEqual(len(result), 1) - - def test_prop_cohort_basic_action(self): - _create_person( - distinct_ids=["some_other_id"], - team_id=self.team.pk, - properties={"$some_prop": "something"}, - ) - - _create_person( - distinct_ids=["some_id"], - team_id=self.team.pk, - properties={"$some_prop": "something", "$another_prop": "something"}, - ) - _create_person(distinct_ids=["no_match"], team_id=self.team.pk) - - action = _create_action(team=self.team, name="$pageview") - _create_event( - event="$pageview", - team=self.team, - distinct_id="some_id", - properties={"attr": "some_val"}, - timestamp=datetime.now() - timedelta(days=1), - ) - - _create_event( - event="$not_pageview", - team=self.team, - distinct_id="some_other_id", - properties={"attr": "some_val"}, - timestamp=datetime.now() - timedelta(days=2), - ) - - cohort1 = Cohort.objects.create(team=self.team, groups=[{"action_id": action.pk, "days": 3}], name="cohort1") - - filter = Filter( - data={"properties": [{"key": "id", "value": cohort1.pk, "type": "cohort"}]}, - team=self.team, - ) - query, params = parse_prop_grouped_clauses( - team_id=self.team.pk, - property_group=filter.property_groups, - person_properties_mode=( - PersonPropertiesMode.USING_SUBQUERY - if self.team.person_on_events_mode == PersonsOnEventsMode.DISABLED - else PersonPropertiesMode.DIRECT_ON_EVENTS - ), - hogql_context=filter.hogql_context, - ) - final_query = "SELECT uuid FROM events WHERE team_id = %(team_id)s {}".format(query) - result = sync_execute( - final_query, - {**params, **filter.hogql_context.values, "team_id": self.team.pk}, - ) - - self.assertEqual(len(result), 1) - - def test_prop_cohort_basic_event_days(self): - _create_person( - distinct_ids=["some_other_id"], - team_id=self.team.pk, - properties={"$some_prop": "something"}, - ) - - _create_person( - distinct_ids=["some_id"], - team_id=self.team.pk, - properties={"$some_prop": "something", "$another_prop": "something"}, - ) - - _create_event( - event="$pageview", - team=self.team, - distinct_id="some_id", - properties={"attr": "some_val"}, - timestamp=datetime.now() - timedelta(days=0, hours=12), - ) - - _create_event( - event="$pageview", - team=self.team, - distinct_id="some_other_id", - properties={"attr": "some_val"}, - timestamp=datetime.now() - timedelta(days=4, hours=12), - ) - - cohort1 = Cohort.objects.create( - team=self.team, - groups=[{"event_id": "$pageview", "days": 1}], - name="cohort1", - ) - - filter = Filter( - data={"properties": [{"key": "id", "value": cohort1.pk, "type": "cohort"}]}, - team=self.team, - ) - query, params = parse_prop_grouped_clauses( - team_id=self.team.pk, - property_group=filter.property_groups, - person_properties_mode=( - PersonPropertiesMode.USING_SUBQUERY - if self.team.person_on_events_mode == PersonsOnEventsMode.DISABLED - else PersonPropertiesMode.DIRECT_ON_EVENTS - ), - hogql_context=filter.hogql_context, - ) - final_query = "SELECT uuid FROM events WHERE team_id = %(team_id)s {}".format(query) - result = sync_execute( - final_query, - {**params, **filter.hogql_context.values, "team_id": self.team.pk}, - ) - self.assertEqual(len(result), 1) - - cohort2 = Cohort.objects.create( - team=self.team, - groups=[{"event_id": "$pageview", "days": 7}], - name="cohort2", - ) - - filter = Filter( - data={"properties": [{"key": "id", "value": cohort2.pk, "type": "cohort"}]}, - team=self.team, - ) - query, params = parse_prop_grouped_clauses( - team_id=self.team.pk, - property_group=filter.property_groups, - person_properties_mode=( - PersonPropertiesMode.USING_SUBQUERY - if self.team.person_on_events_mode == PersonsOnEventsMode.DISABLED - else PersonPropertiesMode.DIRECT_ON_EVENTS - ), - hogql_context=filter.hogql_context, - ) - final_query = "SELECT uuid FROM events WHERE team_id = %(team_id)s {}".format(query) - result = sync_execute( - final_query, - {**params, **filter.hogql_context.values, "team_id": self.team.pk}, - ) - self.assertEqual(len(result), 2) - - def test_prop_cohort_basic_action_days(self): - _create_person( - distinct_ids=["some_other_id"], - team_id=self.team.pk, - properties={"$some_prop": "something"}, - ) - - _create_person( - distinct_ids=["some_id"], - team_id=self.team.pk, - properties={"$some_prop": "something", "$another_prop": "something"}, - ) - - action = _create_action(team=self.team, name="$pageview") - _create_event( - event="$pageview", - team=self.team, - distinct_id="some_id", - properties={"attr": "some_val"}, - timestamp=datetime.now() - timedelta(hours=22), - ) - - _create_event( - event="$pageview", - team=self.team, - distinct_id="some_other_id", - properties={"attr": "some_val"}, - timestamp=datetime.now() - timedelta(days=5), - ) - - cohort1 = Cohort.objects.create(team=self.team, groups=[{"action_id": action.pk, "days": 1}], name="cohort1") - - filter = Filter( - data={"properties": [{"key": "id", "value": cohort1.pk, "type": "cohort"}]}, - team=self.team, - ) - query, params = parse_prop_grouped_clauses( - team_id=self.team.pk, - property_group=filter.property_groups, - person_properties_mode=( - PersonPropertiesMode.USING_SUBQUERY - if self.team.person_on_events_mode == PersonsOnEventsMode.DISABLED - else PersonPropertiesMode.DIRECT_ON_EVENTS - ), - hogql_context=filter.hogql_context, - ) - final_query = "SELECT uuid FROM events WHERE team_id = %(team_id)s {}".format(query) - result = sync_execute( - final_query, - {**params, **filter.hogql_context.values, "team_id": self.team.pk}, - ) - self.assertEqual(len(result), 1) - - cohort2 = Cohort.objects.create(team=self.team, groups=[{"action_id": action.pk, "days": 7}], name="cohort2") - - filter = Filter( - data={"properties": [{"key": "id", "value": cohort2.pk, "type": "cohort"}]}, - team=self.team, - ) - query, params = parse_prop_grouped_clauses( - team_id=self.team.pk, - property_group=filter.property_groups, - person_properties_mode=( - PersonPropertiesMode.USING_SUBQUERY - if self.team.person_on_events_mode == PersonsOnEventsMode.DISABLED - else PersonPropertiesMode.DIRECT_ON_EVENTS - ), - hogql_context=filter.hogql_context, - ) - final_query = "SELECT uuid FROM events WHERE team_id = %(team_id)s {}".format(query) - result = sync_execute( - final_query, - {**params, **filter.hogql_context.values, "team_id": self.team.pk}, - ) - self.assertEqual(len(result), 2) - - def test_prop_cohort_multiple_groups(self): - _create_person( - distinct_ids=["some_other_id"], - team_id=self.team.pk, - properties={"$some_prop": "something"}, - ) - - _create_person( - distinct_ids=["some_id"], - team_id=self.team.pk, - properties={"$another_prop": "something"}, - ) - _create_event( - event="$pageview", - team=self.team, - distinct_id="some_id", - properties={"attr": "some_val"}, - ) - - _create_event( - event="$pageview", - team=self.team, - distinct_id="some_other_id", - properties={"attr": "some_val"}, - ) - - cohort1 = Cohort.objects.create( - team=self.team, - groups=[ - {"properties": [{"key": "$some_prop", "value": "something", "type": "person"}]}, - {"properties": [{"key": "$another_prop", "value": "something", "type": "person"}]}, - ], - name="cohort1", - ) - - filter = Filter( - data={"properties": [{"key": "id", "value": cohort1.pk, "type": "cohort"}]}, - team=self.team, - ) - query, params = parse_prop_grouped_clauses( - team_id=self.team.pk, - property_group=filter.property_groups, - hogql_context=filter.hogql_context, - ) - final_query = "SELECT uuid FROM events WHERE team_id = %(team_id)s {}".format(query) - result = sync_execute( - final_query, - {**params, **filter.hogql_context.values, "team_id": self.team.pk}, - ) - self.assertEqual(len(result), 2) - - def test_prop_cohort_with_negation(self): - team2 = Organization.objects.bootstrap(None)[2] - - _create_person( - distinct_ids=["some_other_id"], - team_id=self.team.pk, - properties={"$some_prop": "something"}, - ) - - _create_person( - distinct_ids=["some_id"], - team_id=team2.pk, - properties={"$another_prop": "something"}, - ) - _create_event( - event="$pageview", - team=self.team, - distinct_id="some_id", - properties={"attr": "some_val"}, - ) - - _create_event( - event="$pageview", - team=self.team, - distinct_id="some_other_id", - properties={"attr": "some_val"}, - ) - - cohort1 = Cohort.objects.create( - team=self.team, - groups=[ - { - "properties": [ - { - "type": "person", - "key": "$some_prop", - "operator": "is_not", - "value": "something", - } - ] - } - ], - name="cohort1", - ) - - filter = Filter( - data={"properties": [{"key": "id", "value": cohort1.pk, "type": "cohort"}]}, - team=self.team, - ) - query, params = parse_prop_grouped_clauses( - team_id=self.team.pk, - property_group=filter.property_groups, - hogql_context=filter.hogql_context, - ) - final_query = "SELECT uuid FROM events WHERE team_id = %(team_id)s {}".format(query) - self.assertIn("\nFROM person_distinct_id2\n", final_query) - - result = sync_execute( - final_query, - {**params, **filter.hogql_context.values, "team_id": self.team.pk}, - ) - self.assertEqual(len(result), 0) - - def test_cohort_get_person_ids_by_cohort_id(self): - user1 = _create_person( - distinct_ids=["user1"], - team_id=self.team.pk, - properties={"$some_prop": "something"}, - ) - _create_person( - distinct_ids=["user2"], - team_id=self.team.pk, - properties={"$some_prop": "another"}, - ) - user3 = _create_person( - distinct_ids=["user3"], - team_id=self.team.pk, - properties={"$some_prop": "something"}, - ) - cohort = Cohort.objects.create( - team=self.team, - groups=[{"properties": [{"key": "$some_prop", "value": "something", "type": "person"}]}], - name="cohort1", - ) - - results = get_person_ids_by_cohort_id(self.team.pk, cohort.id) - self.assertEqual(len(results), 2) - self.assertIn(str(user1.uuid), results) - self.assertIn(str(user3.uuid), results) - - def test_insert_by_distinct_id_or_email(self): - Person.objects.create(team_id=self.team.pk, distinct_ids=["1"]) - Person.objects.create(team_id=self.team.pk, distinct_ids=["123"]) - Person.objects.create(team_id=self.team.pk, distinct_ids=["2"]) - # Team leakage - team2 = Team.objects.create(organization=self.organization) - Person.objects.create(team=team2, distinct_ids=["1"]) - - cohort = Cohort.objects.create(team=self.team, groups=[], is_static=True) - cohort.insert_users_by_list(["1", "123"]) - cohort = Cohort.objects.get() - results = get_person_ids_by_cohort_id(self.team.pk, cohort.id) - self.assertEqual(len(results), 2) - self.assertEqual(cohort.is_calculating, False) - - # test SQLi - Person.objects.create(team_id=self.team.pk, distinct_ids=["'); truncate person_static_cohort; --"]) - cohort.insert_users_by_list(["'); truncate person_static_cohort; --", "123"]) - results = sync_execute( - "select count(1) from person_static_cohort where team_id = %(team_id)s", - {"team_id": self.team.pk}, - )[0][0] - self.assertEqual(results, 3) - - # Β If we accidentally call calculate_people it shouldn't erase people - cohort.calculate_people_ch(pending_version=0) - results = get_person_ids_by_cohort_id(self.team.pk, cohort.id) - self.assertEqual(len(results), 3) - - # if we add people again, don't increase the number of people in cohort - cohort.insert_users_by_list(["123"]) - results = get_person_ids_by_cohort_id(self.team.pk, cohort.id) - self.assertEqual(len(results), 3) - - @snapshot_clickhouse_insert_cohortpeople_queries - def test_cohortpeople_basic(self): - Person.objects.create( - team_id=self.team.pk, - distinct_ids=["1"], - properties={"$some_prop": "something", "$another_prop": "something"}, - ) - Person.objects.create( - team_id=self.team.pk, - distinct_ids=["2"], - properties={"$some_prop": "something", "$another_prop": "something"}, - ) - - cohort1 = Cohort.objects.create( - team=self.team, - groups=[ - { - "properties": [ - {"key": "$some_prop", "value": "something", "type": "person"}, - { - "key": "$another_prop", - "value": "something", - "type": "person", - }, - ] - } - ], - name="cohort1", - ) - - cohort1.calculate_people_ch(pending_version=0) - - results = self._get_cohortpeople(cohort1) - self.assertEqual(len(results), 2) - - def test_cohortpeople_action_basic(self): - action = _create_action(team=self.team, name="$pageview") - Person.objects.create( - team_id=self.team.pk, - distinct_ids=["1"], - properties={"$some_prop": "something", "$another_prop": "something"}, - ) - - _create_event( - event="$pageview", - team=self.team, - distinct_id="1", - properties={"attr": "some_val"}, - timestamp=datetime.now() - timedelta(hours=12), - ) - - Person.objects.create( - team_id=self.team.pk, - distinct_ids=["2"], - properties={"$some_prop": "something", "$another_prop": "something"}, - ) - - _create_event( - event="$pageview", - team=self.team, - distinct_id="2", - properties={"attr": "some_val"}, - timestamp=datetime.now() - timedelta(hours=12), - ) - - cohort1 = Cohort.objects.create(team=self.team, groups=[{"action_id": action.pk, "days": 1}], name="cohort1") - cohort1.calculate_people_ch(pending_version=0) - - results = self._get_cohortpeople(cohort1) - self.assertEqual(len(results), 2) - - cohort2 = Cohort.objects.create(team=self.team, groups=[{"action_id": action.pk, "days": 1}], name="cohort2") - cohort2.calculate_people_ch(pending_version=0) - - results = self._get_cohortpeople(cohort2) - self.assertEqual(len(results), 2) - - def _setup_actions_with_different_counts(self): - action = _create_action(team=self.team, name="$pageview") - Person.objects.create( - team_id=self.team.pk, - distinct_ids=["1"], - properties={"$some_prop": "something", "$another_prop": "something"}, - ) - - _create_event( - event="$pageview", - team=self.team, - distinct_id="1", - properties={"attr": "some_val"}, - timestamp=datetime.now() - timedelta(days=1, hours=12), - ) - _create_event( - event="$pageview", - team=self.team, - distinct_id="1", - properties={"attr": "some_val"}, - timestamp=datetime.now() - timedelta(days=0, hours=12), - ) - - Person.objects.create( - team_id=self.team.pk, - distinct_ids=["2"], - properties={"$some_prop": "something", "$another_prop": "something"}, - ) - - _create_event( - event="$pageview", - team=self.team, - distinct_id="2", - properties={"attr": "some_val"}, - timestamp=datetime.now() - timedelta(days=1, hours=12), - ) - - _create_event( - event="$pageview", - team=self.team, - distinct_id="2", - properties={"attr": "some_val"}, - timestamp=datetime.now() - timedelta(days=0, hours=12), - ) - - Person.objects.create( - team_id=self.team.pk, - distinct_ids=["3"], - properties={"$some_prop": "something", "$another_prop": "something"}, - ) - - _create_event( - event="$pageview", - team=self.team, - distinct_id="3", - properties={"attr": "some_val"}, - timestamp=datetime.now() - timedelta(days=0, hours=12), - ) - - Person.objects.create( - team_id=self.team.pk, - distinct_ids=["4"], - properties={"$some_prop": "something", "$another_prop": "something"}, - ) - - Person.objects.create( - team_id=self.team.pk, - distinct_ids=["5"], - properties={"$some_prop": "something", "$another_prop": "something"}, - ) - return action - - def test_cohortpeople_action_count(self): - action = self._setup_actions_with_different_counts() - - # test operators - cohort1 = Cohort.objects.create( - team=self.team, - groups=[{"action_id": action.pk, "days": 3, "count": 2, "count_operator": "gte"}], - name="cohort1", - ) - cohort1.calculate_people_ch(pending_version=0) - - results = self._get_cohortpeople(cohort1) - self.assertEqual(len(results), 2) - - cohort2 = Cohort.objects.create( - team=self.team, - groups=[{"action_id": action.pk, "days": 3, "count": 1, "count_operator": "lte"}], - name="cohort2", - ) - cohort2.calculate_people_ch(pending_version=0) - - results = self._get_cohortpeople(cohort2) - self.assertEqual(len(results), 1) - - cohort3 = Cohort.objects.create( - team=self.team, - groups=[{"action_id": action.pk, "days": 3, "count": 1, "count_operator": "eq"}], - name="cohort3", - ) - cohort3.calculate_people_ch(pending_version=0) - - results = self._get_cohortpeople(cohort3) - self.assertEqual(len(results), 1) - - def test_cohortpeople_deleted_person(self): - Person.objects.create( - team_id=self.team.pk, - distinct_ids=["1"], - properties={"$some_prop": "something", "$another_prop": "something"}, - ) - p2 = Person.objects.create( - team_id=self.team.pk, - distinct_ids=["2"], - properties={"$some_prop": "something", "$another_prop": "something"}, - ) - - cohort1 = Cohort.objects.create( - team=self.team, - groups=[ - { - "properties": [ - {"key": "$some_prop", "value": "something", "type": "person"}, - { - "key": "$another_prop", - "value": "something", - "type": "person", - }, - ] - } - ], - name="cohort1", - ) - - cohort1.calculate_people_ch(pending_version=0) - p2.delete() - cohort1.calculate_people_ch(pending_version=0) - - def test_cohortpeople_prop_changed(self): - with freeze_time((datetime.now() - timedelta(days=3)).strftime("%Y-%m-%d")): - p1 = Person.objects.create( - team_id=self.team.pk, - distinct_ids=["1"], - properties={"$some_prop": "something", "$another_prop": "something"}, - ) - p2 = Person.objects.create( - team_id=self.team.pk, - distinct_ids=["2"], - properties={"$some_prop": "something", "$another_prop": "something"}, - ) - - cohort1 = Cohort.objects.create( - team=self.team, - groups=[ - { - "properties": [ - { - "key": "$some_prop", - "value": "something", - "type": "person", - }, - { - "key": "$another_prop", - "value": "something", - "type": "person", - }, - ] - } - ], - name="cohort1", - ) - - cohort1.calculate_people_ch(pending_version=0) - - with freeze_time((datetime.now() - timedelta(days=2)).strftime("%Y-%m-%d")): - p2.version = 1 - p2.properties = ({"$some_prop": "another", "$another_prop": "another"},) - p2.save() - - cohort1.calculate_people_ch(pending_version=1) - - results = self._get_cohortpeople(cohort1) - - self.assertEqual(len(results), 1) - self.assertEqual(results[0][0], p1.uuid) - - def test_cohort_change(self): - p1 = Person.objects.create( - team_id=self.team.pk, - distinct_ids=["1"], - properties={"$some_prop": "something", "$another_prop": "something"}, - ) - p2 = Person.objects.create( - team_id=self.team.pk, - distinct_ids=["2"], - properties={"$some_prop": "another", "$another_prop": "another"}, - ) - - cohort1 = Cohort.objects.create( - team=self.team, - groups=[ - { - "properties": [ - {"key": "$some_prop", "value": "something", "type": "person"}, - { - "key": "$another_prop", - "value": "something", - "type": "person", - }, - ] - } - ], - name="cohort1", - ) - cohort1.calculate_people_ch(pending_version=0) - results = self._get_cohortpeople(cohort1) - - self.assertEqual(len(results), 1) - self.assertEqual(results[0][0], p1.uuid) - - cohort1.groups = [ - { - "properties": [ - {"key": "$some_prop", "value": "another", "type": "person"}, - {"key": "$another_prop", "value": "another", "type": "person"}, - ] - } - ] - cohort1.save() - - cohort1.calculate_people_ch(pending_version=1) - - results = self._get_cohortpeople(cohort1) - self.assertEqual(len(results), 1) - self.assertEqual(results[0][0], p2.uuid) - - def test_static_cohort_precalculated(self): - Person.objects.create(team_id=self.team.pk, distinct_ids=["1"]) - Person.objects.create(team_id=self.team.pk, distinct_ids=["123"]) - Person.objects.create(team_id=self.team.pk, distinct_ids=["2"]) - # Team leakage - team2 = Team.objects.create(organization=self.organization) - Person.objects.create(team=team2, distinct_ids=["1"]) - - cohort = Cohort.objects.create(team=self.team, groups=[], is_static=True, last_calculation=timezone.now()) - cohort.insert_users_by_list(["1", "123"]) - - cohort.calculate_people_ch(pending_version=0) - - with self.settings(USE_PRECALCULATED_CH_COHORT_PEOPLE=True): - sql, _ = format_filter_query(cohort, 0, HogQLContext(team_id=self.team.pk)) - self.assertQueryMatchesSnapshot(sql) - - def test_cohortpeople_with_valid_other_cohort_filter(self): - Person.objects.create(team_id=self.team.pk, distinct_ids=["1"], properties={"foo": "bar"}) - Person.objects.create(team_id=self.team.pk, distinct_ids=["2"], properties={"foo": "non"}) - - cohort0: Cohort = Cohort.objects.create( - team=self.team, - groups=[{"properties": [{"key": "foo", "value": "bar", "type": "person"}]}], - name="cohort0", - ) - cohort0.calculate_people_ch(pending_version=0) - - cohort1: Cohort = Cohort.objects.create( - team=self.team, - groups=[{"properties": [{"key": "id", "type": "cohort", "value": cohort0.id}]}], - name="cohort1", - ) - - cohort1.calculate_people_ch(pending_version=0) - - res = self._get_cohortpeople(cohort1) - self.assertEqual(len(res), 1) - - @snapshot_clickhouse_insert_cohortpeople_queries - def test_cohortpeople_with_not_in_cohort_operator(self): - _create_person( - distinct_ids=["1"], - team_id=self.team.pk, - properties={"$some_prop": "something1"}, - ) - _create_person( - distinct_ids=["2"], - team_id=self.team.pk, - properties={"$some_prop": "something2"}, - ) - - _create_event( - event="$pageview", - team=self.team, - distinct_id="1", - properties={"attr": "some_val"}, - timestamp=datetime.now() - timedelta(days=10), - ) - _create_event( - event="$pageview", - team=self.team, - distinct_id="2", - properties={"attr": "some_val"}, - timestamp=datetime.now() - timedelta(days=20), - ) - - flush_persons_and_events() - - cohort0: Cohort = Cohort.objects.create( - team=self.team, - groups=[{"properties": [{"key": "$some_prop", "value": "something1", "type": "person"}]}], - name="cohort0", - ) - cohort0.calculate_people_ch(pending_version=0) - - cohort1 = Cohort.objects.create( - team=self.team, - filters={ - "properties": { - "type": "AND", - "values": [ - { - "event_type": "events", - "key": "$pageview", - "negation": False, - "time_interval": "year", - "time_value": 2, - "type": "behavioral", - "value": "performed_event", - }, - { - "key": "id", - "negation": True, - "type": "cohort", - "value": cohort0.pk, - }, - ], - } - }, - name="cohort1", - ) - - cohort1.calculate_people_ch(pending_version=0) - - with self.settings(USE_PRECALCULATED_CH_COHORT_PEOPLE=True): - filter = Filter( - data={ - "properties": [ - { - "key": "id", - "value": cohort1.pk, - "type": "precalculated-cohort", - } - ] - }, - team=self.team, - ) - query, params = parse_prop_grouped_clauses( - team_id=self.team.pk, - property_group=filter.property_groups, - hogql_context=filter.hogql_context, - ) - final_query = "SELECT uuid, distinct_id FROM events WHERE team_id = %(team_id)s {}".format(query) - - result = sync_execute( - final_query, - {**params, **filter.hogql_context.values, "team_id": self.team.pk}, - ) - - self.assertEqual(len(result), 1) - self.assertEqual(result[0][1], "2") # distinct_id '2' is the one in cohort - - @snapshot_clickhouse_queries - def test_cohortpeople_with_not_in_cohort_operator_and_no_precalculation(self): - _create_person( - distinct_ids=["1"], - team_id=self.team.pk, - properties={"$some_prop": "something1"}, - ) - _create_person( - distinct_ids=["2"], - team_id=self.team.pk, - properties={"$some_prop": "something2"}, - ) - - _create_event( - event="$pageview", - team=self.team, - distinct_id="1", - properties={"attr": "some_val"}, - timestamp=datetime.now() - timedelta(days=10), - ) - _create_event( - event="$pageview", - team=self.team, - distinct_id="2", - properties={"attr": "some_val"}, - timestamp=datetime.now() - timedelta(days=20), - ) - - flush_persons_and_events() - - cohort0: Cohort = Cohort.objects.create( - team=self.team, - groups=[{"properties": [{"key": "$some_prop", "value": "something1", "type": "person"}]}], - name="cohort0", - ) - - cohort1 = Cohort.objects.create( - team=self.team, - filters={ - "properties": { - "type": "AND", - "values": [ - { - "event_type": "events", - "key": "$pageview", - "negation": False, - "time_interval": "year", - "time_value": 2, - "type": "behavioral", - "value": "performed_event", - }, - { - "key": "id", - "negation": True, - "type": "cohort", - "value": cohort0.pk, - }, - ], - } - }, - name="cohort1", - ) - - filter = Filter( - data={"properties": [{"key": "id", "value": cohort1.pk, "type": "cohort"}]}, - team=self.team, - ) - query, params = parse_prop_grouped_clauses( - team_id=self.team.pk, - property_group=filter.property_groups, - hogql_context=filter.hogql_context, - ) - final_query = "SELECT uuid, distinct_id FROM events WHERE team_id = %(team_id)s {}".format(query) - self.assertIn("\nFROM person_distinct_id2\n", final_query) - - result = sync_execute( - final_query, - {**params, **filter.hogql_context.values, "team_id": self.team.pk}, - ) - self.assertEqual(len(result), 1) - self.assertEqual(result[0][1], "2") # distinct_id '2' is the one in cohort - - @snapshot_clickhouse_insert_cohortpeople_queries - def test_cohortpeople_with_not_in_cohort_operator_for_behavioural_cohorts(self): - _create_person( - distinct_ids=["1"], - team_id=self.team.pk, - properties={"$some_prop": "something"}, - ) - _create_person( - distinct_ids=["2"], - team_id=self.team.pk, - properties={"$some_prop": "something"}, - ) - - _create_event( - event="signup", - team=self.team, - distinct_id="1", - properties={"attr": "some_val"}, - timestamp=datetime.now() - timedelta(days=10), - ) - _create_event( - event="$pageview", - team=self.team, - distinct_id="1", - properties={"attr": "some_val"}, - timestamp=datetime.now() - timedelta(days=10), - ) - _create_event( - event="$pageview", - team=self.team, - distinct_id="2", - properties={"attr": "some_val"}, - timestamp=datetime.now() - timedelta(days=20), - ) - flush_persons_and_events() - - cohort0: Cohort = Cohort.objects.create( - team=self.team, - groups=[ - { - "properties": [ - { - "event_type": "events", - "key": "signup", - "negation": False, - "time_interval": "day", - "time_value": 15, - "type": "behavioral", - "value": "performed_event_first_time", - }, - ] - } - ], - name="cohort0", - ) - cohort0.calculate_people_ch(pending_version=0) - - cohort1 = Cohort.objects.create( - team=self.team, - filters={ - "properties": { - "type": "AND", - "values": [ - { - "event_type": "events", - "key": "$pageview", - "negation": False, - "time_interval": "year", - "time_value": 2, - "type": "behavioral", - "value": "performed_event", - }, - { - "key": "id", - "negation": True, - "type": "cohort", - "value": cohort0.pk, - }, - ], - } - }, - name="cohort1", - ) - - cohort1.calculate_people_ch(pending_version=0) - - with self.settings(USE_PRECALCULATED_CH_COHORT_PEOPLE=True): - filter = Filter( - data={"properties": [{"key": "id", "value": cohort1.pk, "type": "cohort"}]}, - team=self.team, - ) - query, params = parse_prop_grouped_clauses( - team_id=self.team.pk, - property_group=filter.property_groups, - hogql_context=filter.hogql_context, - ) - final_query = "SELECT uuid, distinct_id FROM events WHERE team_id = %(team_id)s {}".format(query) - - result = sync_execute( - final_query, - {**params, **filter.hogql_context.values, "team_id": self.team.pk}, - ) - - self.assertEqual(len(result), 1) - self.assertEqual(result[0][1], "2") # distinct_id '2' is the one in cohort - - def test_cohortpeople_with_nonexistent_other_cohort_filter(self): - Person.objects.create(team_id=self.team.pk, distinct_ids=["1"], properties={"foo": "bar"}) - Person.objects.create(team_id=self.team.pk, distinct_ids=["2"], properties={"foo": "non"}) - - cohort1: Cohort = Cohort.objects.create( - team=self.team, - groups=[{"properties": [{"key": "id", "type": "cohort", "value": 666}]}], - name="cohort1", - ) - - cohort1.calculate_people_ch(pending_version=0) - - res = self._get_cohortpeople(cohort1) - self.assertEqual(len(res), 0) - - def test_clickhouse_empty_query(self): - cohort2 = Cohort.objects.create( - team=self.team, - groups=[{"properties": [{"key": "$some_prop", "value": "nomatchihope", "type": "person"}]}], - name="cohort1", - ) - - cohort2.calculate_people_ch(pending_version=0) - self.assertFalse(Cohort.objects.get().is_calculating) - - def test_query_with_multiple_new_style_cohorts(self): - action1 = Action.objects.create( - team=self.team, - name="action1", - steps_json=[ - { - "event": "$autocapture", - "url": "https://posthog.com/feedback/123", - "url_matching": "exact", - } - ], - ) - - # satiesfies all conditions - p1 = Person.objects.create( - team_id=self.team.pk, - distinct_ids=["p1"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - _create_event( - team=self.team, - event="$autocapture", - properties={"$current_url": "https://posthog.com/feedback/123"}, - distinct_id="p1", - timestamp=datetime.now() - timedelta(days=2), - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p1", - timestamp=datetime.now() - timedelta(days=1), - ) - - # doesn't satisfy action - Person.objects.create( - team_id=self.team.pk, - distinct_ids=["p2"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - _create_event( - team=self.team, - event="$autocapture", - properties={"$current_url": "https://posthog.com/feedback/123"}, - distinct_id="p2", - timestamp=datetime.now() - timedelta(weeks=3), - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p2", - timestamp=datetime.now() - timedelta(days=1), - ) - - # satisfies special condition (not pushed down person property in OR group) - p3 = Person.objects.create( - team_id=self.team.pk, - distinct_ids=["p3"], - properties={"name": "special", "email": "test@posthog.com"}, - ) - _create_event( - team=self.team, - event="$autocapture", - properties={"$current_url": "https://posthog.com/feedback/123"}, - distinct_id="p3", - timestamp=datetime.now() - timedelta(days=2), - ) - - cohort2 = Cohort.objects.create( - team=self.team, - filters={ - "properties": { - "type": "AND", - "values": [ - { - "key": action1.pk, - "event_type": "actions", - "time_value": 2, - "time_interval": "week", - "value": "performed_event_first_time", - "type": "behavioral", - }, - { - "key": "email", - "value": "test@posthog.com", - "type": "person", - }, # this is pushed down - ], - } - }, - name="cohort2", - ) - - cohort1 = Cohort.objects.create( - team=self.team, - filters={ - "properties": { - "type": "AND", - "values": [ - { - "type": "OR", - "values": [ - { - "key": "$pageview", - "event_type": "events", - "time_value": 1, - "time_interval": "day", - "value": "performed_event", - "type": "behavioral", - }, - { - "key": "$pageview", - "event_type": "events", - "time_value": 2, - "time_interval": "week", - "value": "performed_event", - "type": "behavioral", - }, - { - "key": "name", - "value": "special", - "type": "person", - }, # this is NOT pushed down - ], - }, - { - "type": "AND", - "values": [{"key": "id", "value": cohort2.pk, "type": "cohort"}], - }, - ], - } - }, - name="cohort1", - ) - - cohort1.calculate_people_ch(pending_version=0) - - result = self._get_cohortpeople(cohort1) - self.assertCountEqual([p1.uuid, p3.uuid], [r[0] for r in result]) - - def test_update_cohort(self): - Person.objects.create( - team_id=self.team.pk, - distinct_ids=["1"], - properties={"$some_prop": "something"}, - ) - Person.objects.create( - team_id=self.team.pk, - distinct_ids=["2"], - properties={"$another_prop": "something"}, - ) - Person.objects.create( - team_id=self.team.pk, - distinct_ids=["3"], - properties={"$another_prop": "something"}, - ) - - cohort1 = Cohort.objects.create( - team=self.team, - groups=[{"properties": [{"key": "$some_prop", "value": "something", "type": "person"}]}], - name="cohort1", - ) - - cohort1.calculate_people_ch(pending_version=0) - - # Should only have p1 in this cohort - results = self._get_cohortpeople(cohort1) - self.assertEqual(len(results), 1) - - cohort1.groups = [{"properties": [{"key": "$another_prop", "value": "something", "type": "person"}]}] - cohort1.save() - cohort1.calculate_people_ch(pending_version=1) - - # Should only have p2, p3 in this cohort - results = self._get_cohortpeople(cohort1) - self.assertEqual(len(results), 2) - - cohort1.groups = [{"properties": [{"key": "$some_prop", "value": "something", "type": "person"}]}] - cohort1.save() - cohort1.calculate_people_ch(pending_version=2) - - # Should only have p1 again in this cohort - results = self._get_cohortpeople(cohort1) - self.assertEqual(len(results), 1) - - def test_cohort_versioning(self): - Person.objects.create( - team_id=self.team.pk, - distinct_ids=["1"], - properties={"$some_prop": "something"}, - ) - Person.objects.create( - team_id=self.team.pk, - distinct_ids=["2"], - properties={"$another_prop": "something"}, - ) - Person.objects.create( - team_id=self.team.pk, - distinct_ids=["3"], - properties={"$another_prop": "something"}, - ) - - # start the cohort at some later version - cohort1 = Cohort.objects.create( - team=self.team, - groups=[{"properties": [{"key": "$some_prop", "value": "something", "type": "person"}]}], - name="cohort1", - ) - - cohort1.calculate_people_ch(pending_version=5) - - cohort1.pending_version = 5 - cohort1.version = 5 - cohort1.save() - - # Should have p1 in this cohort even if version is different - results = self._get_cohortpeople(cohort1) - self.assertEqual(len(results), 1) - - def test_calculate_people_ch_in_multiteam_project(self): - # Create another team in the same project - team2 = Team.objects.create(organization=self.organization, project=self.team.project) - - # Create people in team 1 - _person1_team1 = _create_person( - team_id=self.team.pk, - distinct_ids=["person1"], - properties={"$some_prop": "else"}, - ) - person2_team1 = _create_person( - team_id=self.team.pk, - distinct_ids=["person2"], - properties={"$some_prop": "something"}, - ) - # Create people in team 2 with same property - person1_team2 = _create_person( - team_id=team2.pk, - distinct_ids=["person1_team2"], - properties={"$some_prop": "something"}, - ) - _person2_team2 = _create_person( - team_id=team2.pk, - distinct_ids=["person2_team2"], - properties={"$some_prop": "else"}, - ) - # Create cohort in team 2 (but same project as team 1) - shared_cohort = Cohort.objects.create( - team=team2, - groups=[{"properties": [{"key": "$some_prop", "value": "something", "type": "person"}]}], - name="shared cohort", - ) - # Calculate cohort - shared_cohort.calculate_people_ch(pending_version=0) - - # Verify shared_cohort is now calculated for both teams - results_team1 = self._get_cohortpeople(shared_cohort, team_id=self.team.pk) - results_team2 = self._get_cohortpeople(shared_cohort, team_id=team2.pk) - - self.assertCountEqual([r[0] for r in results_team1], [person2_team1.uuid]) - self.assertCountEqual([r[0] for r in results_team2], [person1_team2.uuid]) diff --git a/ee/clickhouse/models/test/test_dead_letter_queue.py b/ee/clickhouse/models/test/test_dead_letter_queue.py deleted file mode 100644 index 220d7ada32..0000000000 --- a/ee/clickhouse/models/test/test_dead_letter_queue.py +++ /dev/null @@ -1,114 +0,0 @@ -import json -from datetime import datetime -from uuid import uuid4 - -from kafka import KafkaProducer - -from ee.clickhouse.models.test.utils.util import ( - delay_until_clickhouse_consumes_from_kafka, -) -from posthog.clickhouse.dead_letter_queue import ( - DEAD_LETTER_QUEUE_TABLE, - DEAD_LETTER_QUEUE_TABLE_MV_SQL, - INSERT_DEAD_LETTER_QUEUE_EVENT_SQL, - KAFKA_DEAD_LETTER_QUEUE_TABLE_SQL, -) -from posthog.client import sync_execute -from posthog.kafka_client.topics import KAFKA_DEAD_LETTER_QUEUE -from posthog.settings import KAFKA_HOSTS -from posthog.test.base import BaseTest, ClickhouseTestMixin - -TEST_EVENT_RAW_PAYLOAD = json.dumps({"event": "some event", "properties": {"distinct_id": 2, "token": "invalid token"}}) - - -def get_dlq_event(): - CREATED_AT = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f") - ERROR_TIMESTAMP = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f") - NOW = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f") - - return { - "id": str(uuid4()), - "event_uuid": str(uuid4()), - "event": "some event", - "properties": "{ a: 1 }", - "distinct_id": "some distinct id", - "team_id": 1, - "elements_chain": "", - "created_at": CREATED_AT, - "ip": "127.0.0.1", - "site_url": "https://myawesomewebsite.com", - "now": NOW, - "raw_payload": TEST_EVENT_RAW_PAYLOAD, - "error_timestamp": ERROR_TIMESTAMP, - "error_location": "plugin-server", - "error": "createPerson failed", - } - - -def convert_query_result_to_dlq_event_dicts(query_result): - events_returned = [] - - for read_dlq_event in query_result: - events_returned.append( - { - "id": str(read_dlq_event[0]), - "event_uuid": str(read_dlq_event[1]), - "event": str(read_dlq_event[2]), - "properties": str(read_dlq_event[3]), - "distinct_id": str(read_dlq_event[4]), - "team_id": int(read_dlq_event[5]), - "elements_chain": str(read_dlq_event[6]), - "created_at": read_dlq_event[7].strftime("%Y-%m-%d %H:%M:%S.%f"), - "ip": str(read_dlq_event[8]), - "site_url": str(read_dlq_event[9]), - "now": read_dlq_event[10].strftime("%Y-%m-%d %H:%M:%S.%f"), - "raw_payload": str(read_dlq_event[11]), - "error_timestamp": read_dlq_event[12].strftime("%Y-%m-%d %H:%M:%S.%f"), - "error_location": str(read_dlq_event[13]), - "error": str(read_dlq_event[14]), - } - ) - return events_returned - - -class TestDeadLetterQueue(ClickhouseTestMixin, BaseTest): - def setUp(self): - sync_execute(KAFKA_DEAD_LETTER_QUEUE_TABLE_SQL()) - sync_execute(DEAD_LETTER_QUEUE_TABLE_MV_SQL) - super().setUp() - - def tearDown(self): - sync_execute("DROP TABLE IF EXISTS events_dead_letter_queue_mv") - sync_execute("DROP TABLE IF EXISTS kafka_events_dead_letter_queue") - super().tearDown() - - def test_direct_table_insert(self): - inserted_dlq_event = get_dlq_event() - sync_execute(INSERT_DEAD_LETTER_QUEUE_EVENT_SQL, inserted_dlq_event) - query_result = sync_execute(f"SELECT * FROM {DEAD_LETTER_QUEUE_TABLE}") - events_returned = convert_query_result_to_dlq_event_dicts(query_result) - # TRICKY: because it's hard to truncate the dlq table, we just check if the event is in the table along with events from other tests - # Because each generated event is unique, this works - self.assertIn(inserted_dlq_event, events_returned) - - def test_kafka_insert(self): - row_count_before_insert = sync_execute(f"SELECT count(1) FROM {DEAD_LETTER_QUEUE_TABLE}")[0][0] - inserted_dlq_event = get_dlq_event() - - new_error = "cannot reach db to fetch team" - inserted_dlq_event["error"] = new_error - - kafka_producer = KafkaProducer(bootstrap_servers=KAFKA_HOSTS) - - kafka_producer.send( - topic=KAFKA_DEAD_LETTER_QUEUE, - value=json.dumps(inserted_dlq_event).encode("utf-8"), - ) - - delay_until_clickhouse_consumes_from_kafka(DEAD_LETTER_QUEUE_TABLE, row_count_before_insert + 1) - - query_result = sync_execute(f"SELECT * FROM {DEAD_LETTER_QUEUE_TABLE}") - events_returned = convert_query_result_to_dlq_event_dicts(query_result) - # TRICKY: because it's hard to truncate the dlq table, we just check if the event is in the table along with events from other tests - # Because each generated event is unique, this works - self.assertIn(inserted_dlq_event, events_returned) diff --git a/ee/clickhouse/models/test/test_filters.py b/ee/clickhouse/models/test/test_filters.py deleted file mode 100644 index 96cc887df4..0000000000 --- a/ee/clickhouse/models/test/test_filters.py +++ /dev/null @@ -1,1469 +0,0 @@ -import json -from typing import Optional - -from posthog.client import query_with_columns, sync_execute -from posthog.constants import FILTER_TEST_ACCOUNTS -from posthog.models import Element, Organization, Person, Team -from posthog.models.cohort import Cohort -from posthog.models.event.sql import GET_EVENTS_WITH_PROPERTIES -from posthog.models.event.util import ClickhouseEventSerializer -from posthog.models.filters import Filter -from posthog.models.filters.retention_filter import RetentionFilter -from posthog.models.filters.test.test_filter import TestFilter as PGTestFilters -from posthog.models.filters.test.test_filter import property_to_Q_test_factory -from posthog.models.property.util import parse_prop_grouped_clauses -from posthog.queries.util import PersonPropertiesMode -from posthog.test.base import ClickhouseTestMixin, _create_event, _create_person -from posthog.test.test_journeys import journeys_for - - -def _filter_events(filter: Filter, team: Team, order_by: Optional[str] = None): - prop_filters, prop_filter_params = parse_prop_grouped_clauses( - property_group=filter.property_groups, - team_id=team.pk, - hogql_context=filter.hogql_context, - ) - params = {"team_id": team.pk, **prop_filter_params} - - events = query_with_columns( - GET_EVENTS_WITH_PROPERTIES.format( - filters=prop_filters, - order_by="ORDER BY {}".format(order_by) if order_by else "", - ), - params, - ) - parsed_events = ClickhouseEventSerializer(events, many=True, context={"elements": None, "people": None}).data - return parsed_events - - -def _filter_persons(filter: Filter, team: Team): - prop_filters, prop_filter_params = parse_prop_grouped_clauses( - property_group=filter.property_groups, - team_id=team.pk, - person_properties_mode=PersonPropertiesMode.USING_PERSON_PROPERTIES_COLUMN, - hogql_context=filter.hogql_context, - ) - # Note this query does not handle person rows changing over time - rows = sync_execute( - f"SELECT id, properties AS person_props FROM person WHERE team_id = %(team_id)s {prop_filters}", - {"team_id": team.pk, **prop_filter_params, **filter.hogql_context.values}, - ) - return [str(uuid) for uuid, _ in rows] - - -class TestFilters(PGTestFilters): - maxDiff = None - - def test_simplify_cohorts(self): - cohort = Cohort.objects.create( - team=self.team, - groups=[ - { - "properties": [ - { - "key": "email", - "operator": "icontains", - "value": ".com", - "type": "person", - } - ] - } - ], - ) - cohort.calculate_people_ch(pending_version=0) - - filter = Filter(data={"properties": [{"type": "cohort", "key": "id", "value": cohort.pk}]}) - filter_with_groups = Filter( - data={ - "properties": { - "type": "AND", - "values": [{"type": "cohort", "key": "id", "value": cohort.pk}], - } - } - ) - - self.assertEqual( - filter.simplify(self.team).properties_to_dict(), - { - "properties": { - "type": "AND", - "values": [ - { - "type": "person", - "key": "email", - "operator": "icontains", - "value": ".com", - } - ], - } - }, - ) - - self.assertEqual( - filter_with_groups.simplify(self.team).properties_to_dict(), - { - "properties": { - "type": "AND", - "values": [ - { - "type": "person", - "key": "email", - "operator": "icontains", - "value": ".com", - } - ], - } - }, - ) - - with self.settings(USE_PRECALCULATED_CH_COHORT_PEOPLE=True): - self.assertEqual( - filter.simplify(self.team).properties_to_dict(), - { - "properties": { - "type": "AND", - "values": [ - { - "key": "id", - "value": cohort.pk, - "negation": False, - "type": "precalculated-cohort", - } - ], - } - }, - ) - - self.assertEqual( - filter_with_groups.simplify(self.team).properties_to_dict(), - { - "properties": { - "type": "AND", - "values": [ - { - "key": "id", - "negation": False, - "value": cohort.pk, - "type": "precalculated-cohort", - } - ], - } - }, - ) - - def test_simplify_static_cohort(self): - cohort = Cohort.objects.create(team=self.team, groups=[], is_static=True) - filter = Filter(data={"properties": [{"type": "cohort", "key": "id", "value": cohort.pk}]}) - - self.assertEqual( - filter.simplify(self.team).properties_to_dict(), - { - "properties": { - "type": "AND", - "values": [{"type": "static-cohort", "negation": False, "key": "id", "value": cohort.pk}], - } - }, - ) - - def test_simplify_hasdone_cohort(self): - cohort = Cohort.objects.create(team=self.team, groups=[{"event_id": "$pageview", "days": 1}]) - filter = Filter(data={"properties": [{"type": "cohort", "key": "id", "value": cohort.pk}]}) - - self.assertEqual( - filter.simplify(self.team).properties_to_dict(), - { - "properties": { - "type": "AND", - "values": [{"type": "cohort", "negation": False, "key": "id", "value": cohort.pk}], - } - }, - ) - - def test_simplify_multi_group_cohort(self): - cohort = Cohort.objects.create( - team=self.team, - groups=[ - {"properties": [{"key": "$some_prop", "value": "something", "type": "person"}]}, - {"properties": [{"key": "$another_prop", "value": "something", "type": "person"}]}, - ], - ) - filter = Filter(data={"properties": [{"type": "cohort", "key": "id", "value": cohort.pk}]}) - - self.assertEqual( - filter.simplify(self.team).properties_to_dict(), - { - "properties": { - "type": "AND", - "values": [ - { - "type": "OR", - "values": [ - { - "type": "AND", - "values": [ - { - "type": "person", - "key": "$some_prop", - "value": "something", - } - ], - }, - { - "type": "AND", - "values": [ - { - "type": "person", - "key": "$another_prop", - "value": "something", - } - ], - }, - ], - } - ], - } - }, - ) - - def test_recursive_cohort(self): - cohort = Cohort.objects.create( - team=self.team, - groups=[ - { - "properties": [ - { - "key": "email", - "operator": "icontains", - "value": ".com", - "type": "person", - } - ] - } - ], - ) - recursive_cohort = Cohort.objects.create( - team=self.team, - groups=[{"properties": [{"type": "cohort", "key": "id", "value": cohort.pk}]}], - ) - filter = Filter(data={"properties": [{"type": "cohort", "key": "id", "value": recursive_cohort.pk}]}) - - self.assertEqual( - filter.simplify(self.team).properties_to_dict(), - { - "properties": { - "type": "AND", - "values": [ - { - "key": "email", - "operator": "icontains", - "value": ".com", - "type": "person", - } - ], - } - }, - ) - - def test_simplify_cohorts_with_recursive_negation(self): - cohort = Cohort.objects.create( - team=self.team, - groups=[ - { - "properties": [ - { - "key": "email", - "operator": "icontains", - "value": ".com", - "type": "person", - } - ] - } - ], - ) - recursive_cohort = Cohort.objects.create( - team=self.team, - groups=[ - { - "properties": [ - {"key": "email", "value": "xyz", "type": "person"}, - { - "type": "cohort", - "key": "id", - "value": cohort.pk, - "negation": True, - }, - ] - } - ], - ) - filter = Filter( - data={ - "properties": [ - { - "type": "cohort", - "key": "id", - "value": recursive_cohort.pk, - "negation": True, - } - ] - } - ) - - self.assertEqual( - filter.simplify(self.team).properties_to_dict(), - { - "properties": { - "type": "AND", - "values": [ - { - "type": "cohort", - "key": "id", - "value": recursive_cohort.pk, - "negation": True, - } - ], - } - }, - ) - - def test_simplify_cohorts_with_simple_negation(self): - cohort = Cohort.objects.create( - team=self.team, - groups=[ - { - "properties": [ - { - "key": "email", - "operator": "icontains", - "value": ".com", - "type": "person", - } - ] - } - ], - ) - filter = Filter( - data={ - "properties": [ - { - "type": "cohort", - "key": "id", - "value": cohort.pk, - "negation": True, - } - ] - } - ) - - self.assertEqual( - filter.simplify(self.team).properties_to_dict(), - { - "properties": { - "type": "AND", - "values": [ - { - "type": "cohort", - "key": "id", - "value": cohort.pk, - "negation": True, - } - ], - } - }, - ) - - def test_simplify_no_such_cohort(self): - filter = Filter(data={"properties": [{"type": "cohort", "key": "id", "value": 555_555}]}) - - self.assertEqual( - filter.simplify(self.team).properties_to_dict(), - { - "properties": { - "type": "AND", - "values": [{"type": "cohort", "key": "id", "value": 555_555}], - } - }, - ) - - def test_simplify_entities(self): - cohort = Cohort.objects.create( - team=self.team, - groups=[ - { - "properties": [ - { - "key": "email", - "operator": "icontains", - "value": ".com", - "type": "person", - } - ] - } - ], - ) - filter = Filter( - data={ - "events": [ - { - "id": "$pageview", - "properties": [{"type": "cohort", "key": "id", "value": cohort.pk}], - } - ] - } - ) - - self.assertEqual( - filter.simplify(self.team).entities_to_dict(), - { - "events": [ - { - "type": "events", - "distinct_id_field": None, - "id": "$pageview", - "id_field": None, - "math": None, - "math_hogql": None, - "math_property": None, - "math_group_type_index": None, - "custom_name": None, - "order": None, - "name": "$pageview", - "properties": { - "type": "AND", - "values": [ - { - "key": "email", - "operator": "icontains", - "value": ".com", - "type": "person", - } - ], - }, - "table_name": None, - "timestamp_field": None, - } - ] - }, - ) - - def test_simplify_entities_with_group_math(self): - filter = Filter( - data={ - "events": [ - { - "id": "$pageview", - "math": "unique_group", - "math_group_type_index": 2, - } - ] - } - ) - - self.assertEqual( - filter.simplify(self.team).entities_to_dict(), - { - "events": [ - { - "type": "events", - "distinct_id_field": None, - "id": "$pageview", - "id_field": None, - "math": "unique_group", - "math_hogql": None, - "math_property": None, - "math_group_type_index": 2, - "custom_name": None, - "order": None, - "name": "$pageview", - "properties": { - "type": "AND", - "values": [ - { - "key": "$group_2", - "operator": "is_not", - "value": "", - "type": "event", - } - ], - }, - "table_name": None, - "timestamp_field": None, - } - ] - }, - ) - - def test_simplify_when_aggregating_by_group(self): - filter = RetentionFilter(data={"aggregation_group_type_index": 0}) - - self.assertEqual( - filter.simplify(self.team).properties_to_dict(), - { - "properties": { - "type": "AND", - "values": [ - { - "key": "$group_0", - "operator": "is_not", - "value": "", - "type": "event", - } - ], - } - }, - ) - - def test_simplify_funnel_entities_when_aggregating_by_group(self): - filter = Filter(data={"events": [{"id": "$pageview"}], "aggregation_group_type_index": 2}) - - self.assertEqual( - filter.simplify(self.team).properties_to_dict(), - { - "properties": { - "type": "AND", - "values": [ - { - "key": "$group_2", - "operator": "is_not", - "value": "", - "type": "event", - } - ], - } - }, - ) - - -class TestFiltering(ClickhouseTestMixin, property_to_Q_test_factory(_filter_persons, _create_person)): # type: ignore - def test_simple(self): - _create_event(team=self.team, distinct_id="test", event="$pageview") - _create_event( - team=self.team, - distinct_id="test", - event="$pageview", - properties={"$current_url": 1}, - ) # test for type incompatibility - _create_event( - team=self.team, - distinct_id="test", - event="$pageview", - properties={"$current_url": {"bla": "bla"}}, - ) # test for type incompatibility - _create_event( - team=self.team, - event="$pageview", - distinct_id="test", - properties={"$current_url": "https://whatever.com"}, - ) - filter = Filter(data={"properties": {"$current_url": "https://whatever.com"}}) - events = _filter_events(filter, self.team) - self.assertEqual(len(events), 1) - - def test_multiple_equality(self): - _create_event(team=self.team, distinct_id="test", event="$pageview") - _create_event( - team=self.team, - distinct_id="test", - event="$pageview", - properties={"$current_url": 1}, - ) # test for type incompatibility - _create_event( - team=self.team, - distinct_id="test", - event="$pageview", - properties={"$current_url": {"bla": "bla"}}, - ) # test for type incompatibility - _create_event( - team=self.team, - event="$pageview", - distinct_id="test", - properties={"$current_url": "https://whatever.com"}, - ) - _create_event( - team=self.team, - event="$pageview", - distinct_id="test", - properties={"$current_url": "https://example.com"}, - ) - filter = Filter(data={"properties": {"$current_url": ["https://whatever.com", "https://example.com"]}}) - events = _filter_events(filter, self.team) - self.assertEqual(len(events), 2) - - def test_numerical(self): - event1_uuid = _create_event( - team=self.team, - distinct_id="test", - event="$pageview", - properties={"$a_number": 5}, - ) - event2_uuid = _create_event( - team=self.team, - event="$pageview", - distinct_id="test", - properties={"$a_number": 6}, - ) - _create_event( - team=self.team, - event="$pageview", - distinct_id="test", - properties={"$a_number": "rubbish"}, - ) - filter = Filter(data={"properties": {"$a_number__gt": 5}}) - events = _filter_events(filter, self.team) - self.assertEqual(events[0]["id"], event2_uuid) - - filter = Filter(data={"properties": {"$a_number": 5}}) - events = _filter_events(filter, self.team) - self.assertEqual(events[0]["id"], event1_uuid) - - filter = Filter(data={"properties": {"$a_number__lt": 6}}) - events = _filter_events(filter, self.team) - self.assertEqual(events[0]["id"], event1_uuid) - - def test_numerical_person_properties(self): - _create_person(team_id=self.team.pk, distinct_ids=["p1"], properties={"$a_number": 4}) - _create_person(team_id=self.team.pk, distinct_ids=["p2"], properties={"$a_number": 5}) - _create_person(team_id=self.team.pk, distinct_ids=["p3"], properties={"$a_number": 6}) - - filter = Filter( - data={ - "properties": [ - { - "type": "person", - "key": "$a_number", - "value": 4, - "operator": "gt", - } - ] - } - ) - self.assertEqual(len(_filter_persons(filter, self.team)), 2) - - filter = Filter(data={"properties": [{"type": "person", "key": "$a_number", "value": 5}]}) - self.assertEqual(len(_filter_persons(filter, self.team)), 1) - - filter = Filter( - data={ - "properties": [ - { - "type": "person", - "key": "$a_number", - "value": 6, - "operator": "lt", - } - ] - } - ) - self.assertEqual(len(_filter_persons(filter, self.team)), 2) - - def test_contains(self): - _create_event(team=self.team, distinct_id="test", event="$pageview") - event2_uuid = _create_event( - team=self.team, - event="$pageview", - distinct_id="test", - properties={"$current_url": "https://whatever.com"}, - ) - filter = Filter(data={"properties": {"$current_url__icontains": "whatever"}}) - events = _filter_events(filter, self.team) - self.assertEqual(events[0]["id"], event2_uuid) - - def test_regex(self): - event1_uuid = _create_event(team=self.team, distinct_id="test", event="$pageview") - event2_uuid = _create_event( - team=self.team, - event="$pageview", - distinct_id="test", - properties={"$current_url": "https://whatever.com"}, - ) - filter = Filter(data={"properties": {"$current_url__regex": r"\.com$"}}) - events = _filter_events(filter, self.team) - self.assertEqual(events[0]["id"], event2_uuid) - - filter = Filter(data={"properties": {"$current_url__not_regex": r"\.eee$"}}) - events = _filter_events(filter, self.team, order_by="timestamp") - self.assertEqual(events[0]["id"], event1_uuid) - self.assertEqual(events[1]["id"], event2_uuid) - - def test_invalid_regex(self): - _create_event(team=self.team, distinct_id="test", event="$pageview") - _create_event( - team=self.team, - event="$pageview", - distinct_id="test", - properties={"$current_url": "https://whatever.com"}, - ) - - filter = Filter(data={"properties": {"$current_url__regex": "?*"}}) - self.assertEqual(len(_filter_events(filter, self.team)), 0) - - filter = Filter(data={"properties": {"$current_url__not_regex": "?*"}}) - self.assertEqual(len(_filter_events(filter, self.team)), 0) - - def test_is_not(self): - event1_uuid = _create_event(team=self.team, distinct_id="test", event="$pageview") - event2_uuid = _create_event( - team=self.team, - event="$pageview", - distinct_id="test", - properties={"$current_url": "https://something.com"}, - ) - _create_event( - team=self.team, - event="$pageview", - distinct_id="test", - properties={"$current_url": "https://whatever.com"}, - ) - filter = Filter(data={"properties": {"$current_url__is_not": "https://whatever.com"}}) - events = _filter_events(filter, self.team) - self.assertEqual( - sorted([events[0]["id"], events[1]["id"]]), - sorted([event1_uuid, event2_uuid]), - ) - self.assertEqual(len(events), 2) - - def test_does_not_contain(self): - event1_uuid = _create_event(team=self.team, event="$pageview", distinct_id="test") - event2_uuid = _create_event( - team=self.team, - event="$pageview", - distinct_id="test", - properties={"$current_url": "https://something.com"}, - ) - _create_event( - team=self.team, - event="$pageview", - distinct_id="test", - properties={"$current_url": "https://whatever.com"}, - ) - event3_uuid = _create_event( - team=self.team, - event="$pageview", - distinct_id="test", - properties={"$current_url": None}, - ) - filter = Filter(data={"properties": {"$current_url__not_icontains": "whatever.com"}}) - events = _filter_events(filter, self.team) - self.assertCountEqual([event["id"] for event in events], [event1_uuid, event2_uuid, event3_uuid]) - self.assertEqual(len(events), 3) - - def test_multiple(self): - event2_uuid = _create_event( - team=self.team, - event="$pageview", - distinct_id="test", - properties={ - "$current_url": "https://something.com", - "another_key": "value", - }, - ) - _create_event( - team=self.team, - event="$pageview", - distinct_id="test", - properties={"$current_url": "https://something.com"}, - ) - filter = Filter( - data={ - "properties": { - "$current_url__icontains": "something.com", - "another_key": "value", - } - } - ) - events = _filter_events(filter, self.team) - self.assertEqual(events[0]["id"], event2_uuid) - self.assertEqual(len(events), 1) - - def test_user_properties(self): - _create_person( - team_id=self.team.pk, - distinct_ids=["person1"], - properties={"group": "some group"}, - ) - _create_person( - team_id=self.team.pk, - distinct_ids=["person2"], - properties={"group": "another group"}, - ) - event2_uuid = _create_event( - team=self.team, - distinct_id="person1", - event="$pageview", - properties={ - "$current_url": "https://something.com", - "another_key": "value", - }, - ) - event_p2_uuid = _create_event( - team=self.team, - distinct_id="person2", - event="$pageview", - properties={"$current_url": "https://something.com"}, - ) - - # test for leakage - _, _, team2 = Organization.objects.bootstrap(None) - _create_person( - team_id=team2.pk, - distinct_ids=["person_team_2"], - properties={"group": "another group"}, - ) - _create_event( - team=team2, - distinct_id="person_team_2", - event="$pageview", - properties={ - "$current_url": "https://something.com", - "another_key": "value", - }, - ) - - filter = Filter(data={"properties": [{"key": "group", "value": "some group", "type": "person"}]}) - events = _filter_events(filter=filter, team=self.team, order_by=None) - self.assertEqual(len(events), 1) - self.assertEqual(events[0]["id"], event2_uuid) - - filter = Filter( - data={ - "properties": [ - { - "key": "group", - "operator": "is_not", - "value": "some group", - "type": "person", - } - ] - } - ) - events = _filter_events(filter=filter, team=self.team, order_by=None) - self.assertEqual(events[0]["id"], event_p2_uuid) - self.assertEqual(len(events), 1) - - def test_user_properties_numerical(self): - _create_person(team_id=self.team.pk, distinct_ids=["person1"], properties={"group": 1}) - _create_person(team_id=self.team.pk, distinct_ids=["person2"], properties={"group": 2}) - event2_uuid = _create_event( - team=self.team, - distinct_id="person1", - event="$pageview", - properties={ - "$current_url": "https://something.com", - "another_key": "value", - }, - ) - _create_event( - team=self.team, - distinct_id="person2", - event="$pageview", - properties={"$current_url": "https://something.com"}, - ) - filter = Filter( - data={ - "properties": [ - {"key": "group", "operator": "lt", "value": 2, "type": "person"}, - {"key": "group", "operator": "gt", "value": 0, "type": "person"}, - ] - } - ) - events = _filter_events(filter=filter, team=self.team, order_by=None) - self.assertEqual(events[0]["id"], event2_uuid) - self.assertEqual(len(events), 1) - - def test_boolean_filters(self): - _create_event(team=self.team, event="$pageview", distinct_id="test") - event2_uuid = _create_event( - team=self.team, - event="$pageview", - distinct_id="test", - properties={"is_first_user": True}, - ) - filter = Filter(data={"properties": [{"key": "is_first_user", "value": "true"}]}) - events = _filter_events(filter, self.team) - self.assertEqual(events[0]["id"], event2_uuid) - self.assertEqual(len(events), 1) - - def test_is_not_set_and_is_set(self): - event1_uuid = _create_event(team=self.team, event="$pageview", distinct_id="test") - event2_uuid = _create_event( - team=self.team, - event="$pageview", - distinct_id="test", - properties={"is_first_user": True}, - ) - filter = Filter( - data={ - "properties": [ - { - "key": "is_first_user", - "operator": "is_not_set", - "value": "is_not_set", - } - ] - } - ) - events = _filter_events(filter, self.team) - self.assertEqual(events[0]["id"], event1_uuid) - self.assertEqual(len(events), 1) - - filter = Filter(data={"properties": [{"key": "is_first_user", "operator": "is_set", "value": "is_set"}]}) - events = _filter_events(filter, self.team) - self.assertEqual(events[0]["id"], event2_uuid) - self.assertEqual(len(events), 1) - - def test_is_not_set_and_is_set_with_missing_value(self): - event1_uuid = _create_event(team=self.team, event="$pageview", distinct_id="test") - event2_uuid = _create_event( - team=self.team, - event="$pageview", - distinct_id="test", - properties={"is_first_user": True}, - ) - filter = Filter(data={"properties": [{"key": "is_first_user", "operator": "is_not_set"}]}) - events = _filter_events(filter, self.team) - self.assertEqual(events[0]["id"], event1_uuid) - self.assertEqual(len(events), 1) - - filter = Filter(data={"properties": [{"key": "is_first_user", "operator": "is_set"}]}) - events = _filter_events(filter, self.team) - self.assertEqual(events[0]["id"], event2_uuid) - self.assertEqual(len(events), 1) - - def test_true_false(self): - _create_event(team=self.team, distinct_id="test", event="$pageview") - event2_uuid = _create_event( - team=self.team, - event="$pageview", - distinct_id="test", - properties={"is_first": True}, - ) - filter = Filter(data={"properties": {"is_first": "true"}}) - events = _filter_events(filter, self.team) - self.assertEqual(events[0]["id"], event2_uuid) - - filter = Filter(data={"properties": {"is_first": ["true"]}}) - events = _filter_events(filter, self.team) - - self.assertEqual(events[0]["id"], event2_uuid) - - def test_is_not_true_false(self): - event_uuid = _create_event(team=self.team, distinct_id="test", event="$pageview") - _create_event( - team=self.team, - event="$pageview", - distinct_id="test", - properties={"is_first": True}, - ) - filter = Filter(data={"properties": [{"key": "is_first", "value": "true", "operator": "is_not"}]}) - events = _filter_events(filter, self.team) - self.assertEqual(events[0]["id"], event_uuid) - - def test_json_object(self): - _create_person( - team_id=self.team.pk, - distinct_ids=["person1"], - properties={"name": {"first_name": "Mary", "last_name": "Smith"}}, - ) - event1_uuid = _create_event( - team=self.team, - distinct_id="person1", - event="$pageview", - properties={"$current_url": "https://something.com"}, - ) - filter = Filter( - data={ - "properties": [ - { - "key": "name", - "value": json.dumps({"first_name": "Mary", "last_name": "Smith"}), - "type": "person", - } - ] - } - ) - events = _filter_events(filter=filter, team=self.team, order_by=None) - self.assertEqual(events[0]["id"], event1_uuid) - self.assertEqual(len(events), 1) - - def test_element_selectors(self): - _create_event( - team=self.team, - event="$autocapture", - distinct_id="distinct_id", - elements=[ - Element.objects.create(tag_name="a"), - Element.objects.create(tag_name="div"), - ], - ) - _create_event(team=self.team, event="$autocapture", distinct_id="distinct_id") - filter = Filter(data={"properties": [{"key": "selector", "value": "div > a", "type": "element"}]}) - events = _filter_events(filter=filter, team=self.team) - self.assertEqual(len(events), 1) - - def test_element_filter(self): - _create_event( - team=self.team, - event="$autocapture", - distinct_id="distinct_id", - elements=[ - Element.objects.create(tag_name="a", text="some text"), - Element.objects.create(tag_name="div"), - ], - ) - - _create_event( - team=self.team, - event="$autocapture", - distinct_id="distinct_id", - elements=[ - Element.objects.create(tag_name="a", text="some other text"), - Element.objects.create(tag_name="div"), - ], - ) - - _create_event(team=self.team, event="$autocapture", distinct_id="distinct_id") - filter = Filter( - data={ - "properties": [ - { - "key": "text", - "value": ["some text", "some other text"], - "type": "element", - } - ] - } - ) - events = _filter_events(filter=filter, team=self.team) - self.assertEqual(len(events), 2) - - filter2 = Filter(data={"properties": [{"key": "text", "value": "some text", "type": "element"}]}) - events_response_2 = _filter_events(filter=filter2, team=self.team) - self.assertEqual(len(events_response_2), 1) - - def test_filter_out_team_members(self): - _create_person( - team_id=self.team.pk, - distinct_ids=["team_member"], - properties={"email": "test@posthog.com"}, - ) - _create_person( - team_id=self.team.pk, - distinct_ids=["random_user"], - properties={"email": "test@gmail.com"}, - ) - self.team.test_account_filters = [ - { - "key": "email", - "value": "@posthog.com", - "operator": "not_icontains", - "type": "person", - } - ] - self.team.save() - _create_event(team=self.team, distinct_id="team_member", event="$pageview") - _create_event(team=self.team, distinct_id="random_user", event="$pageview") - filter = Filter( - data={FILTER_TEST_ACCOUNTS: True, "events": [{"id": "$pageview"}]}, - team=self.team, - ) - events = _filter_events(filter=filter, team=self.team) - self.assertEqual(len(events), 1) - - def test_filter_out_team_members_with_grouped_properties(self): - _create_person( - team_id=self.team.pk, - distinct_ids=["person1"], - properties={"email": "test1@gmail.com", "name": "test", "age": "10"}, - ) - _create_person( - team_id=self.team.pk, - distinct_ids=["person2"], - properties={"email": "test2@gmail.com", "name": "test", "age": "20"}, - ) - _create_person( - team_id=self.team.pk, - distinct_ids=["person3"], - properties={"email": "test3@gmail.com", "name": "test", "age": "30"}, - ) - _create_person( - team_id=self.team.pk, - distinct_ids=["person4"], - properties={"email": "test4@gmail.com", "name": "test", "age": "40"}, - ) - _create_person( - team_id=self.team.pk, - distinct_ids=["person5"], - properties={"email": "test@posthog.com", "name": "test", "age": "50"}, - ) - - self.team.test_account_filters = [ - { - "key": "email", - "value": "@posthog.com", - "operator": "not_icontains", - "type": "person", - } - ] - self.team.save() - - journeys_for( - team=self.team, - create_people=False, - events_by_person={ - "person1": [ - { - "event": "$pageview", - "properties": { - "key": "val", - "$browser": "Safari", - "$browser_version": 14, - }, - } - ], - "person2": [ - { - "event": "$pageview", - "properties": { - "key": "val", - "$browser": "Safari", - "$browser_version": 14, - }, - } - ], - "person3": [ - { - "event": "$pageview", - "properties": { - "key": "val", - "$browser": "Safari", - "$browser_version": 14, - }, - } - ], - "person4": [ - { - "event": "$pageview", - "properties": { - "key": "val", - "$browser": "Safari", - "$browser_version": 14, - }, - } - ], - "person5": [ - { - "event": "$pageview", - "properties": { - "key": "val", - "$browser": "Safari", - "$browser_version": 14, - }, - } - ], - }, - ) - - filter = Filter( - data={ - FILTER_TEST_ACCOUNTS: True, - "events": [{"id": "$pageview"}], - "properties": { - "type": "OR", - "values": [ - { - "type": "OR", - "values": [ - { - "key": "age", - "value": "10", - "operator": "exact", - "type": "person", - }, - { - "key": "age", - "value": "20", - "operator": "exact", - "type": "person", - }, - # choose person 1 and 2 - ], - }, - { - "type": "AND", - "values": [ - { - "key": "$browser", - "value": "Safari", - "operator": "exact", - "type": "event", - }, - { - "key": "age", - "value": "50", - "operator": "exact", - "type": "person", - }, - # choose person 5 - ], - }, - ], - }, - }, - team=self.team, - ) - events = _filter_events(filter=filter, team=self.team) - # test account filters delete person 5, so only 1 and 2 remain - self.assertEqual(len(events), 2) - - def test_person_cohort_properties(self): - person1_distinct_id = "person1" - Person.objects.create( - team=self.team, - distinct_ids=[person1_distinct_id], - properties={"$some_prop": "something"}, - ) - - cohort1 = Cohort.objects.create( - team=self.team, - groups=[{"properties": [{"type": "person", "key": "$some_prop", "value": "something"}]}], - name="cohort1", - ) - - person2_distinct_id = "person2" - Person.objects.create( - team=self.team, - distinct_ids=[person2_distinct_id], - properties={"$some_prop": "different"}, - ) - cohort2 = Cohort.objects.create( - team=self.team, - groups=[ - { - "properties": [ - { - "type": "person", - "key": "$some_prop", - "value": "something", - "operator": "is_not", - } - ] - } - ], - name="cohort2", - ) - - filter = Filter( - data={"properties": [{"key": "id", "value": cohort1.pk, "type": "cohort"}]}, - team=self.team, - ) - - prop_clause, prop_clause_params = parse_prop_grouped_clauses( - property_group=filter.property_groups, - has_person_id_joined=False, - team_id=self.team.pk, - hogql_context=filter.hogql_context, - ) - query = """ - SELECT distinct_id FROM person_distinct_id2 WHERE team_id = %(team_id)s {prop_clause} - """.format(prop_clause=prop_clause) - # get distinct_id column of result - result = sync_execute( - query, - { - "team_id": self.team.pk, - **prop_clause_params, - **filter.hogql_context.values, - }, - )[0][0] - self.assertEqual(result, person1_distinct_id) - - # test cohort2 with negation - filter = Filter( - data={"properties": [{"key": "id", "value": cohort2.pk, "type": "cohort"}]}, - team=self.team, - ) - prop_clause, prop_clause_params = parse_prop_grouped_clauses( - property_group=filter.property_groups, - has_person_id_joined=False, - team_id=self.team.pk, - hogql_context=filter.hogql_context, - ) - query = """ - SELECT distinct_id FROM person_distinct_id2 WHERE team_id = %(team_id)s {prop_clause} - """.format(prop_clause=prop_clause) - # get distinct_id column of result - result = sync_execute( - query, - { - "team_id": self.team.pk, - **prop_clause_params, - **filter.hogql_context.values, - }, - )[0][0] - - self.assertEqual(result, person2_distinct_id) - - def test_simplify_nested(self): - filter = Filter( - data={ - "properties": { - "type": "OR", - "values": [ - { - "type": "OR", - "values": [ - { - "type": "AND", - "values": [ - { - "type": "person", - "key": "email", - "operator": "icontains", - "value": ".com", - } - ], - } - ], - }, - { - "type": "AND", - "values": [ - { - "type": "person", - "key": "email", - "operator": "icontains", - "value": "arg2", - }, - { - "type": "person", - "key": "email", - "operator": "icontains", - "value": "arg3", - }, - ], - }, - ], - } - } - ) - - # Can't remove the single prop groups if the parent group has multiple. The second list of conditions becomes property groups - # because of simplify now will return prop groups by default to ensure type consistency - self.assertEqual( - filter.simplify(self.team).properties_to_dict(), - { - "properties": { - "type": "OR", - "values": [ - { - "type": "OR", - "values": [ - { - "type": "person", - "key": "email", - "operator": "icontains", - "value": ".com", - } - ], - }, - { - "type": "AND", - "values": [ - { - "type": "AND", - "values": [ - { - "type": "person", - "key": "email", - "operator": "icontains", - "value": "arg2", - } - ], - }, - { - "type": "AND", - "values": [ - { - "type": "person", - "key": "email", - "operator": "icontains", - "value": "arg3", - } - ], - }, - ], - }, - ], - } - }, - ) - - filter = Filter( - data={ - "properties": { - "type": "OR", - "values": [ - { - "type": "OR", - "values": [ - { - "type": "AND", - "values": [ - { - "type": "person", - "key": "email", - "operator": "icontains", - "value": ".com", - } - ], - } - ], - }, - { - "type": "AND", - "values": [ - { - "type": "person", - "key": "email", - "operator": "icontains", - "value": "arg2", - } - ], - }, - ], - } - } - ) - - self.assertEqual( - filter.simplify(self.team).properties_to_dict(), - { - "properties": { - "type": "OR", - "values": [ - { - "type": "OR", - "values": [ - { - "type": "person", - "key": "email", - "operator": "icontains", - "value": ".com", - } - ], - }, - { - "type": "AND", - "values": [ - { - "type": "person", - "key": "email", - "operator": "icontains", - "value": "arg2", - } - ], - }, - ], - } - }, - ) diff --git a/ee/clickhouse/models/test/test_property.py b/ee/clickhouse/models/test/test_property.py deleted file mode 100644 index fd1791438c..0000000000 --- a/ee/clickhouse/models/test/test_property.py +++ /dev/null @@ -1,2012 +0,0 @@ -from datetime import datetime -from typing import Literal, Union, cast -from uuid import UUID - -import pytest -from freezegun.api import freeze_time -from rest_framework.exceptions import ValidationError - -from ee.clickhouse.materialized_columns.columns import materialize -from posthog.client import sync_execute -from posthog.constants import PropertyOperatorType -from posthog.models.cohort import Cohort -from posthog.models.element import Element -from posthog.models.filters import Filter -from posthog.models.instance_setting import ( - get_instance_setting, -) -from posthog.models.organization import Organization -from posthog.models.property import Property, TableWithProperties -from posthog.models.property.util import ( - PropertyGroup, - get_property_string_expr, - get_single_or_multi_property_string_expr, - parse_prop_grouped_clauses, - prop_filter_json_extract, -) -from posthog.models.team import Team -from posthog.queries.person_distinct_id_query import get_team_distinct_ids_query -from posthog.queries.person_query import PersonQuery -from posthog.queries.property_optimizer import PropertyOptimizer -from posthog.queries.util import PersonPropertiesMode -from posthog.test.base import ( - BaseTest, - ClickhouseTestMixin, - _create_event, - _create_person, - cleanup_materialized_columns, - snapshot_clickhouse_queries, -) - - -class TestPropFormat(ClickhouseTestMixin, BaseTest): - CLASS_DATA_LEVEL_SETUP = False - - def _run_query(self, filter: Filter, **kwargs) -> list: - query, params = parse_prop_grouped_clauses( - property_group=filter.property_groups, - allow_denormalized_props=True, - team_id=self.team.pk, - hogql_context=filter.hogql_context, - **kwargs, - ) - final_query = "SELECT uuid FROM events WHERE team_id = %(team_id)s {}".format(query) - return sync_execute( - final_query, - {**params, **filter.hogql_context.values, "team_id": self.team.pk}, - ) - - def test_prop_person(self): - _create_person( - distinct_ids=["some_other_id"], - team_id=self.team.pk, - properties={"email": "another@posthog.com"}, - ) - - _create_person( - distinct_ids=["some_id"], - team_id=self.team.pk, - properties={"email": "test@posthog.com"}, - ) - - _create_event( - event="$pageview", - team=self.team, - distinct_id="some_id", - properties={"attr": "some_val"}, - ) - - filter = Filter(data={"properties": [{"key": "email", "value": "test@posthog.com", "type": "person"}]}) - self.assertEqual(len(self._run_query(filter)), 1) - - def test_prop_event(self): - _create_event( - event="$pageview", - team=self.team, - distinct_id="whatever", - properties={"attr": "some_other_val"}, - ) - - _create_event( - event="$pageview", - team=self.team, - distinct_id="whatever", - properties={"attr": "some_val"}, - ) - - filter_exact = Filter(data={"properties": [{"key": "attr", "value": "some_val"}]}) - self.assertEqual(len(self._run_query(filter_exact)), 1) - - filter_regex = Filter(data={"properties": [{"key": "attr", "value": "some_.+_val", "operator": "regex"}]}) - self.assertEqual(len(self._run_query(filter_regex)), 1) - - filter_icontains = Filter(data={"properties": [{"key": "attr", "value": "Some_Val", "operator": "icontains"}]}) - self.assertEqual(len(self._run_query(filter_icontains)), 1) - - filter_not_icontains = Filter( - data={"properties": [{"key": "attr", "value": "other", "operator": "not_icontains"}]} - ) - self.assertEqual(len(self._run_query(filter_not_icontains)), 1) - - def test_prop_element(self): - _create_event( - event="$autocapture", - team=self.team, - distinct_id="whatever", - properties={"attr": "some_other_val"}, - elements=[ - Element( - tag_name="a", - href="/a-url", - attr_class=["small"], - text="bla bla", - nth_child=1, - nth_of_type=0, - ), - Element( - tag_name="button", - attr_class=["btn", "btn-primary"], - nth_child=0, - nth_of_type=0, - ), - Element(tag_name="div", nth_child=0, nth_of_type=0), - Element(tag_name="label", nth_child=0, nth_of_type=0, attr_id="nested"), - ], - ) - _create_event( - event="$autocapture", - team=self.team, - distinct_id="whatever", - properties={"attr": "some_val"}, - elements=[ - Element( - tag_name="a", - href="/a-url", - attr_class=["small"], - text='bla"bla', - attributes={}, - nth_child=1, - nth_of_type=0, - ), - Element( - tag_name="button", - attr_class=["btn", "btn-secondary"], - nth_child=0, - nth_of_type=0, - ), - Element(tag_name="div", nth_child=0, nth_of_type=0), - Element(tag_name="img", nth_child=0, nth_of_type=0, attr_id="nested"), - ], - ) - _create_event( - event="$autocapture", - team=self.team, - distinct_id="whatever", - elements=[ - Element(tag_name="a", href="/789", nth_child=0, nth_of_type=0), - Element( - tag_name="button", - attr_class=["btn", "btn-tertiary"], - nth_child=0, - nth_of_type=0, - ), - ], - ) - - # selector - - filter = Filter( - data={ - "properties": [ - { - "key": "selector", - "value": [".btn"], - "operator": "exact", - "type": "element", - } - ] - } - ) - self.assertEqual(len(self._run_query(filter)), 3) - - filter = Filter( - data={ - "properties": [ - { - "key": "selector", - "value": ".btn", - "operator": "exact", - "type": "element", - } - ] - } - ) - self.assertEqual(len(self._run_query(filter)), 3) - - filter = Filter( - data={ - "properties": [ - { - "key": "selector", - "value": [".btn-primary"], - "operator": "exact", - "type": "element", - } - ] - } - ) - self.assertEqual(len(self._run_query(filter)), 1) - - filter = Filter( - data={ - "properties": [ - { - "key": "selector", - "value": [".btn-secondary"], - "operator": "exact", - "type": "element", - } - ] - } - ) - self.assertEqual(len(self._run_query(filter)), 1) - - filter = Filter( - data={ - "properties": [ - { - "key": "selector", - "value": [".btn-primary", ".btn-secondary"], - "operator": "exact", - "type": "element", - } - ] - } - ) - self.assertEqual(len(self._run_query(filter)), 2) - - filter_selector_exact_empty = Filter( - data={ - "properties": [ - { - "key": "selector", - "value": [], - "operator": "exact", - "type": "element", - } - ] - } - ) - self.assertEqual(len(self._run_query(filter_selector_exact_empty)), 0) - - filter_selector_is_not_empty = Filter( - data={ - "properties": [ - { - "key": "selector", - "value": [], - "operator": "is_not", - "type": "element", - } - ] - } - ) - self.assertEqual(len(self._run_query(filter_selector_is_not_empty)), 3) - - # tag_name - - filter = Filter( - data={ - "properties": [ - { - "key": "tag_name", - "value": ["div"], - "operator": "exact", - "type": "element", - } - ] - } - ) - self.assertEqual(len(self._run_query(filter)), 2) - - filter = Filter( - data={ - "properties": [ - { - "key": "tag_name", - "value": "div", - "operator": "exact", - "type": "element", - } - ] - } - ) - self.assertEqual(len(self._run_query(filter)), 2) - - filter = Filter( - data={ - "properties": [ - { - "key": "tag_name", - "value": ["img"], - "operator": "exact", - "type": "element", - } - ] - } - ) - self.assertEqual(len(self._run_query(filter)), 1) - - filter = Filter( - data={ - "properties": [ - { - "key": "tag_name", - "value": ["label"], - "operator": "exact", - "type": "element", - } - ] - } - ) - self.assertEqual(len(self._run_query(filter)), 1) - - filter = Filter( - data={ - "properties": [ - { - "key": "tag_name", - "value": ["img", "label"], - "operator": "exact", - "type": "element", - } - ] - } - ) - self.assertEqual(len(self._run_query(filter)), 2) - - # href/text - - filter_href_exact = Filter( - data={ - "properties": [ - { - "key": "href", - "value": ["/a-url"], - "operator": "exact", - "type": "element", - } - ] - } - ) - self.assertEqual(len(self._run_query(filter_href_exact)), 2) - - filter_href_exact_double = Filter( - data={ - "properties": [ - { - "key": "href", - "value": ["/a-url", "/789"], - "operator": "exact", - "type": "element", - } - ] - } - ) - self.assertEqual(len(self._run_query(filter_href_exact_double)), 3) - - filter_href_exact_empty = Filter( - data={"properties": [{"key": "href", "value": [], "operator": "exact", "type": "element"}]} - ) - self.assertEqual(len(self._run_query(filter_href_exact_empty)), 0) - - filter_href_is_not = Filter( - data={ - "properties": [ - { - "key": "href", - "value": ["/a-url"], - "operator": "is_not", - "type": "element", - } - ] - } - ) - self.assertEqual(len(self._run_query(filter_href_is_not)), 1) - - filter_href_is_not_double = Filter( - data={ - "properties": [ - { - "key": "href", - "value": ["/a-url", "/789"], - "operator": "is_not", - "type": "element", - } - ] - } - ) - self.assertEqual(len(self._run_query(filter_href_is_not_double)), 0) - - filter_href_is_not_empty = Filter( - data={ - "properties": [ - { - "key": "href", - "value": [], - "operator": "is_not", - "type": "element", - } - ] - } - ) - self.assertEqual(len(self._run_query(filter_href_is_not_empty)), 3) - - filter_href_exact_with_tag_name_is_not = Filter( - data={ - "properties": [ - {"key": "href", "value": ["/a-url"], "type": "element"}, - { - "key": "tag_name", - "value": ["marquee"], - "operator": "is_not", - "type": "element", - }, - ] - } - ) - self.assertEqual(len(self._run_query(filter_href_exact_with_tag_name_is_not)), 2) - - filter_href_icontains = Filter( - data={ - "properties": [ - { - "key": "href", - "value": ["UrL"], - "operator": "icontains", - "type": "element", - } - ] - } - ) - self.assertEqual(len(self._run_query(filter_href_icontains)), 2) - - filter_href_regex = Filter( - data={ - "properties": [ - { - "key": "href", - "value": "/a-.+", - "operator": "regex", - "type": "element", - } - ] - } - ) - self.assertEqual(len(self._run_query(filter_href_regex)), 2) - - filter_href_not_regex = Filter( - data={ - "properties": [ - { - "key": "href", - "value": r"/\d+", - "operator": "not_regex", - "type": "element", - } - ] - } - ) - self.assertEqual(len(self._run_query(filter_href_not_regex)), 2) - - filter_text_icontains_with_doublequote = Filter( - data={ - "properties": [ - { - "key": "text", - "value": 'bla"bla', - "operator": "icontains", - "type": "element", - } - ] - } - ) - self.assertEqual(len(self._run_query(filter_text_icontains_with_doublequote)), 1) - - filter_text_is_set = Filter( - data={ - "properties": [ - { - "key": "text", - "value": "is_set", - "operator": "is_set", - "type": "element", - } - ] - } - ) - self.assertEqual(len(self._run_query(filter_text_is_set)), 2) - - filter_text_is_not_set = Filter( - data={ - "properties": [ - { - "key": "text", - "value": "is_not_set", - "operator": "is_not_set", - "type": "element", - } - ] - } - ) - self.assertEqual(len(self._run_query(filter_text_is_not_set)), 1) - - def test_prop_element_with_space(self): - _create_event( - event="$autocapture", - team=self.team, - distinct_id="whatever", - elements=[ - Element(tag_name="a", href="/789", nth_child=0, nth_of_type=0), - Element( - tag_name="button", - attr_class=["btn space", "btn-tertiary"], - nth_child=0, - nth_of_type=0, - ), - ], - ) - - # selector - - filter = Filter( - data={ - "properties": [ - { - "key": "selector", - "value": ["button"], - "operator": "exact", - "type": "element", - } - ] - } - ) - self.assertEqual(len(self._run_query(filter)), 1) - - def test_prop_ints_saved_as_strings(self): - _create_event( - event="$pageview", - team=self.team, - distinct_id="whatever", - properties={"test_prop": "0"}, - ) - _create_event( - event="$pageview", - team=self.team, - distinct_id="whatever", - properties={"test_prop": "2"}, - ) - _create_event( - event="$pageview", - team=self.team, - distinct_id="whatever", - properties={"test_prop": 2}, - ) - _create_event( - event="$pageview", - team=self.team, - distinct_id="whatever", - properties={"test_prop": "string"}, - ) - filter = Filter(data={"properties": [{"key": "test_prop", "value": "2"}]}) - self.assertEqual(len(self._run_query(filter)), 2) - - filter = Filter(data={"properties": [{"key": "test_prop", "value": 2}]}) - self.assertEqual(len(self._run_query(filter)), 2) - - # value passed as string - filter = Filter(data={"properties": [{"key": "test_prop", "value": "1", "operator": "gt"}]}) - self.assertEqual(len(self._run_query(filter)), 2) - filter = Filter(data={"properties": [{"key": "test_prop", "value": "3", "operator": "lt"}]}) - self.assertEqual(len(self._run_query(filter)), 3) - - # value passed as int - filter = Filter(data={"properties": [{"key": "test_prop", "value": 1, "operator": "gt"}]}) - self.assertEqual(len(self._run_query(filter)), 2) - - filter = Filter(data={"properties": [{"key": "test_prop", "value": 3, "operator": "lt"}]}) - self.assertEqual(len(self._run_query(filter)), 3) - - def test_prop_decimals(self): - _create_event( - event="$pageview", - team=self.team, - distinct_id="whatever", - properties={"test_prop": 1.4}, - ) - _create_event( - event="$pageview", - team=self.team, - distinct_id="whatever", - properties={"test_prop": 1.3}, - ) - _create_event( - event="$pageview", - team=self.team, - distinct_id="whatever", - properties={"test_prop": 2}, - ) - _create_event( - event="$pageview", - team=self.team, - distinct_id="whatever", - properties={"test_prop": 2.5}, - ) - - filter = Filter(data={"properties": [{"key": "test_prop", "value": 1.5}]}) - self.assertEqual(len(self._run_query(filter)), 0) - - filter = Filter(data={"properties": [{"key": "test_prop", "value": 1.2, "operator": "gt"}]}) - self.assertEqual(len(self._run_query(filter)), 4) - - filter = Filter(data={"properties": [{"key": "test_prop", "value": "1.2", "operator": "gt"}]}) - self.assertEqual(len(self._run_query(filter)), 4) - - filter = Filter(data={"properties": [{"key": "test_prop", "value": 2.3, "operator": "lt"}]}) - self.assertEqual(len(self._run_query(filter)), 3) - - @snapshot_clickhouse_queries - def test_parse_groups(self): - _create_event( - event="$pageview", - team=self.team, - distinct_id="some_id", - properties={"attr_1": "val_1", "attr_2": "val_2"}, - ) - - _create_event( - event="$pageview", - team=self.team, - distinct_id="some_id", - properties={"attr_1": "val_2"}, - ) - - _create_event( - event="$pageview", - team=self.team, - distinct_id="some_other_id", - properties={"attr_1": "val_3"}, - ) - - filter = Filter( - data={ - "properties": { - "type": "OR", - "values": [ - { - "type": "AND", - "values": [ - {"key": "attr_1", "value": "val_1"}, - {"key": "attr_2", "value": "val_2"}, - ], - }, - {"type": "OR", "values": [{"key": "attr_1", "value": "val_2"}]}, - ], - } - } - ) - - self.assertEqual(len(self._run_query(filter)), 2) - - def test_parse_groups_invalid_type(self): - filter = Filter( - data={ - "properties": { - "type": "OR", - "values": [ - { - "type": "AND", - "values": [ - {"key": "attr", "value": "val_1"}, - {"key": "attr_2", "value": "val_2"}, - ], - }, - {"type": "XOR", "values": [{"key": "attr", "value": "val_2"}]}, - ], - } - } - ) - with self.assertRaises(ValidationError): - self._run_query(filter) - - @snapshot_clickhouse_queries - def test_parse_groups_persons(self): - _create_person( - distinct_ids=["some_id"], - team_id=self.team.pk, - properties={"email": "1@posthog.com"}, - ) - - _create_person( - distinct_ids=["some_other_id"], - team_id=self.team.pk, - properties={"email": "2@posthog.com"}, - ) - _create_person( - distinct_ids=["some_other_random_id"], - team_id=self.team.pk, - properties={"email": "X@posthog.com"}, - ) - - _create_event( - event="$pageview", - team=self.team, - distinct_id="some_id", - properties={"attr": "val_1"}, - ) - - _create_event( - event="$pageview", - team=self.team, - distinct_id="some_other_id", - properties={"attr": "val_3"}, - ) - - _create_event( - event="$pageview", - team=self.team, - distinct_id="some_other_random_id", - properties={"attr": "val_3"}, - ) - - filter = Filter( - data={ - "properties": { - "type": "OR", - "values": [ - { - "type": "OR", - "values": [ - { - "key": "email", - "type": "person", - "value": "1@posthog.com", - } - ], - }, - { - "type": "OR", - "values": [ - { - "key": "email", - "type": "person", - "value": "2@posthog.com", - } - ], - }, - ], - } - } - ) - - self.assertEqual(len(self._run_query(filter)), 2) - - -class TestPropDenormalized(ClickhouseTestMixin, BaseTest): - CLASS_DATA_LEVEL_SETUP = False - - def _run_query(self, filter: Filter, join_person_tables=False) -> list: - outer_properties = PropertyOptimizer().parse_property_groups(filter.property_groups).outer - query, params = parse_prop_grouped_clauses( - team_id=self.team.pk, - property_group=outer_properties, - allow_denormalized_props=True, - person_properties_mode=PersonPropertiesMode.USING_PERSON_PROPERTIES_COLUMN, - hogql_context=filter.hogql_context, - ) - joins = "" - if join_person_tables: - person_query = PersonQuery(filter, self.team.pk) - person_subquery, person_join_params = person_query.get_query() - joins = f""" - INNER JOIN ({get_team_distinct_ids_query(self.team.pk)}) AS pdi ON events.distinct_id = pdi.distinct_id - INNER JOIN ({person_subquery}) person ON pdi.person_id = person.id - """ - params.update(person_join_params) - - final_query = f"SELECT uuid FROM events {joins} WHERE team_id = %(team_id)s {query}" - # Make sure we don't accidentally use json on the properties field - self.assertNotIn("json", final_query.lower()) - return sync_execute( - final_query, - {**params, **filter.hogql_context.values, "team_id": self.team.pk}, - ) - - def test_prop_event_denormalized(self): - _create_event( - event="$pageview", - team=self.team, - distinct_id="whatever", - properties={"test_prop": "some_other_val"}, - ) - - _create_event( - event="$pageview", - team=self.team, - distinct_id="whatever", - properties={"test_prop": "some_val"}, - ) - - materialize("events", "test_prop") - materialize("events", "something_else") - - filter = Filter(data={"properties": [{"key": "test_prop", "value": "some_val"}]}) - self.assertEqual(len(self._run_query(filter)), 1) - - filter = Filter(data={"properties": [{"key": "test_prop", "value": "some_val", "operator": "is_not"}]}) - self.assertEqual(len(self._run_query(filter)), 1) - - filter = Filter(data={"properties": [{"key": "test_prop", "value": "some_val", "operator": "is_set"}]}) - self.assertEqual(len(self._run_query(filter)), 2) - - filter = Filter(data={"properties": [{"key": "test_prop", "value": "some_val", "operator": "is_not_set"}]}) - self.assertEqual(len(self._run_query(filter)), 0) - - filter = Filter(data={"properties": [{"key": "test_prop", "value": "_other_", "operator": "icontains"}]}) - self.assertEqual(len(self._run_query(filter)), 1) - - filter = Filter( - data={ - "properties": [ - { - "key": "test_prop", - "value": "_other_", - "operator": "not_icontains", - } - ] - } - ) - self.assertEqual(len(self._run_query(filter)), 1) - - def test_prop_person_denormalized(self): - _create_person( - distinct_ids=["some_id"], - team_id=self.team.pk, - properties={"email": "test@posthog.com"}, - ) - _create_event(event="$pageview", team=self.team, distinct_id="some_id") - - materialize("person", "email") - - filter = Filter( - data={ - "properties": [ - { - "key": "email", - "type": "person", - "value": "posthog", - "operator": "icontains", - } - ] - } - ) - self.assertEqual(len(self._run_query(filter, join_person_tables=True)), 1) - - filter = Filter( - data={ - "properties": [ - { - "key": "email", - "type": "person", - "value": "posthog", - "operator": "not_icontains", - } - ] - } - ) - self.assertEqual(len(self._run_query(filter, join_person_tables=True)), 0) - - def test_prop_person_groups_denormalized(self): - _filter = { - "properties": { - "type": "OR", - "values": [ - { - "type": "OR", - "values": [ - { - "key": "event_prop2", - "value": ["foo2", "bar2"], - "type": "event", - "operator": None, - }, - { - "key": "person_prop2", - "value": "efg2", - "type": "person", - "operator": None, - }, - ], - }, - { - "type": "AND", - "values": [ - { - "key": "event_prop", - "value": ["foo", "bar"], - "type": "event", - "operator": None, - }, - { - "key": "person_prop", - "value": "efg", - "type": "person", - "operator": None, - }, - ], - }, - ], - } - } - - filter = Filter(data=_filter) - - _create_person(distinct_ids=["some_id_1"], team_id=self.team.pk, properties={}) - _create_event( - event="$pageview", - team=self.team, - distinct_id="some_id_1", - properties={"event_prop2": "foo2"}, - ) - - _create_person( - distinct_ids=["some_id_2"], - team_id=self.team.pk, - properties={"person_prop2": "efg2"}, - ) - _create_event(event="$pageview", team=self.team, distinct_id="some_id_2") - - _create_person( - distinct_ids=["some_id_3"], - team_id=self.team.pk, - properties={"person_prop": "efg"}, - ) - _create_event( - event="$pageview", - team=self.team, - distinct_id="some_id_3", - properties={"event_prop": "foo"}, - ) - - materialize("events", "event_prop") - materialize("events", "event_prop2") - materialize("person", "person_prop") - materialize("person", "person_prop2") - self.assertEqual(len(self._run_query(filter, join_person_tables=True)), 3) - - def test_prop_event_denormalized_ints(self): - _create_event( - event="$pageview", - team=self.team, - distinct_id="whatever", - properties={"test_prop": 0}, - ) - - _create_event( - event="$pageview", - team=self.team, - distinct_id="whatever", - properties={"test_prop": 2}, - ) - - materialize("events", "test_prop") - materialize("events", "something_else") - - filter = Filter(data={"properties": [{"key": "test_prop", "value": 1, "operator": "gt"}]}) - self.assertEqual(len(self._run_query(filter)), 1) - - filter = Filter(data={"properties": [{"key": "test_prop", "value": 1, "operator": "lt"}]}) - self.assertEqual(len(self._run_query(filter)), 1) - - filter = Filter(data={"properties": [{"key": "test_prop", "value": 0}]}) - self.assertEqual(len(self._run_query(filter)), 1) - - def test_get_property_string_expr(self): - string_expr = get_property_string_expr("events", "some_non_mat_prop", "'some_non_mat_prop'", "properties") - self.assertEqual( - string_expr, - ( - "replaceRegexpAll(JSONExtractRaw(properties, 'some_non_mat_prop'), '^\"|\"$', '')", - False, - ), - ) - - string_expr = get_property_string_expr( - "events", - "some_non_mat_prop", - "'some_non_mat_prop'", - "properties", - table_alias="e", - ) - self.assertEqual( - string_expr, - ( - "replaceRegexpAll(JSONExtractRaw(e.properties, 'some_non_mat_prop'), '^\"|\"$', '')", - False, - ), - ) - - materialize("events", "some_mat_prop") - string_expr = get_property_string_expr("events", "some_mat_prop", "'some_mat_prop'", "properties") - self.assertEqual(string_expr, ('"mat_some_mat_prop"', True)) - - string_expr = get_property_string_expr( - "events", "some_mat_prop", "'some_mat_prop'", "properties", table_alias="e" - ) - self.assertEqual(string_expr, ('e."mat_some_mat_prop"', True)) - - materialize("events", "some_mat_prop2", table_column="person_properties") - materialize("events", "some_mat_prop3", table_column="group2_properties") - string_expr = get_property_string_expr( - "events", - "some_mat_prop2", - "x", - "properties", - materialised_table_column="person_properties", - ) - self.assertEqual(string_expr, ('"mat_pp_some_mat_prop2"', True)) - - -@pytest.mark.django_db -def test_parse_prop_clauses_defaults(snapshot): - filter = Filter( - data={ - "properties": [ - {"key": "event_prop", "value": "value"}, - { - "key": "email", - "type": "person", - "value": "posthog", - "operator": "icontains", - }, - ] - } - ) - - assert ( - parse_prop_grouped_clauses( - property_group=filter.property_groups, - allow_denormalized_props=False, - team_id=1, - hogql_context=filter.hogql_context, - ) - == snapshot - ) - assert ( - parse_prop_grouped_clauses( - property_group=filter.property_groups, - person_properties_mode=PersonPropertiesMode.USING_PERSON_PROPERTIES_COLUMN, - allow_denormalized_props=False, - team_id=1, - hogql_context=filter.hogql_context, - ) - == snapshot - ) - assert ( - parse_prop_grouped_clauses( - team_id=1, - property_group=filter.property_groups, - person_properties_mode=PersonPropertiesMode.DIRECT, - allow_denormalized_props=False, - hogql_context=filter.hogql_context, - ) - == snapshot - ) - - -@pytest.mark.django_db -def test_parse_prop_clauses_precalculated_cohort(snapshot): - Cohort.objects.filter(pk=42).delete() - org = Organization.objects.create(name="other org") - - team = Team.objects.create(organization=org) - # force pk for snapshot consistency - cohort = Cohort.objects.create(pk=42, team=team, groups=[{"event_id": "$pageview", "days": 7}], name="cohort") - - filter = Filter( - data={"properties": [{"key": "id", "value": cohort.pk, "type": "precalculated-cohort"}]}, - team=team, - ) - - assert ( - parse_prop_grouped_clauses( - team_id=1, - property_group=filter.property_groups, - person_properties_mode=PersonPropertiesMode.USING_SUBQUERY, - allow_denormalized_props=False, - person_id_joined_alias="pdi.person_id", - hogql_context=filter.hogql_context, - ) - == snapshot - ) - - -# Regression test for: https://github.com/PostHog/posthog/pull/9283 -@pytest.mark.django_db -def test_parse_prop_clauses_funnel_step_element_prepend_regression(snapshot): - filter = Filter( - data={ - "properties": [ - { - "key": "text", - "type": "element", - "value": "Insights1", - "operator": "exact", - } - ] - } - ) - - assert ( - parse_prop_grouped_clauses( - property_group=filter.property_groups, - allow_denormalized_props=False, - team_id=1, - prepend="PREPEND", - hogql_context=filter.hogql_context, - ) - == snapshot - ) - - -@pytest.mark.django_db -def test_parse_groups_persons_edge_case_with_single_filter(snapshot): - filter = Filter( - data={ - "properties": { - "type": "OR", - "values": [{"key": "email", "type": "person", "value": "1@posthog.com"}], - } - } - ) - assert ( - parse_prop_grouped_clauses( - team_id=1, - property_group=filter.property_groups, - person_properties_mode=PersonPropertiesMode.USING_PERSON_PROPERTIES_COLUMN, - allow_denormalized_props=True, - hogql_context=filter.hogql_context, - ) - == snapshot - ) - - -TEST_BREAKDOWN_PROCESSING = [ - ( - "$browser", - "events", - "prop", - "properties", - ( - "replaceRegexpAll(JSONExtractRaw(properties, %(breakdown_param_1)s), '^\"|\"$', '') AS prop", - {"breakdown_param_1": "$browser"}, - ), - ), - ( - ["$browser"], - "events", - "value", - "properties", - ( - "array(replaceRegexpAll(JSONExtractRaw(properties, %(breakdown_param_1)s), '^\"|\"$', '')) AS value", - {"breakdown_param_1": "$browser"}, - ), - ), - ( - ["$browser", "$browser_version"], - "events", - "prop", - "properties", - ( - "array(replaceRegexpAll(JSONExtractRaw(properties, %(breakdown_param_1)s), '^\"|\"$', ''),replaceRegexpAll(JSONExtractRaw(properties, %(breakdown_param_2)s), '^\"|\"$', '')) AS prop", - {"breakdown_param_1": "$browser", "breakdown_param_2": "$browser_version"}, - ), - ), -] - - -@pytest.mark.django_db -@pytest.mark.parametrize("breakdown, table, query_alias, column, expected", TEST_BREAKDOWN_PROCESSING) -def test_breakdown_query_expression( - clean_up_materialised_columns, - breakdown: Union[str, list[str]], - table: TableWithProperties, - query_alias: Literal["prop", "value"], - column: str, - expected: str, -): - actual = get_single_or_multi_property_string_expr(breakdown, table, query_alias, column) - - assert actual == expected - - -TEST_BREAKDOWN_PROCESSING_MATERIALIZED = [ - ( - ["$browser"], - "events", - "value", - "properties", - "person_properties", - ( - "array(replaceRegexpAll(JSONExtractRaw(properties, %(breakdown_param_1)s), '^\"|\"$', '')) AS value", - {"breakdown_param_1": "$browser"}, - ), - ('array("mat_pp_$browser") AS value', {"breakdown_param_1": "$browser"}), - ) -] - - -@pytest.mark.django_db -@pytest.mark.parametrize( - "breakdown, table, query_alias, column, materialise_column, expected_with, expected_without", - TEST_BREAKDOWN_PROCESSING_MATERIALIZED, -) -def test_breakdown_query_expression_materialised( - clean_up_materialised_columns, - breakdown: Union[str, list[str]], - table: TableWithProperties, - query_alias: Literal["prop", "value"], - column: str, - materialise_column: str, - expected_with: str, - expected_without: str, -): - from posthog.models.team import util - - util.can_enable_actor_on_events = True - - materialize(table, breakdown[0], table_column="properties") - actual = get_single_or_multi_property_string_expr( - breakdown, - table, - query_alias, - column, - materialised_table_column=materialise_column, - ) - assert actual == expected_with - - materialize(table, breakdown[0], table_column=materialise_column) # type: ignore - actual = get_single_or_multi_property_string_expr( - breakdown, - table, - query_alias, - column, - materialised_table_column=materialise_column, - ) - - assert actual == expected_without - - -@pytest.fixture -def test_events(db, team) -> list[UUID]: - return [ - _create_event( - event="$pageview", - team=team, - distinct_id="whatever", - properties={"email": "test@posthog.com"}, - group2_properties={"email": "test@posthog.com"}, - ), - _create_event( - event="$pageview", - team=team, - distinct_id="whatever", - properties={"email": "mongo@example.com"}, - group2_properties={"email": "mongo@example.com"}, - ), - _create_event( - event="$pageview", - team=team, - distinct_id="whatever", - properties={"attr": "some_val"}, - group2_properties={"attr": "some_val"}, - ), - _create_event( - event="$pageview", - team=team, - distinct_id="whatever", - properties={"attr": "50"}, - group2_properties={"attr": "50"}, - ), - _create_event( - event="$pageview", - team=team, - distinct_id="whatever", - properties={"attr": 5}, - group2_properties={"attr": 5}, - ), - _create_event( - event="$pageview", - team=team, - distinct_id="whatever", - # unix timestamp in seconds - properties={"unix_timestamp": int(datetime(2021, 4, 1, 18).timestamp())}, - group2_properties={"unix_timestamp": int(datetime(2021, 4, 1, 18).timestamp())}, - ), - _create_event( - event="$pageview", - team=team, - distinct_id="whatever", - # unix timestamp in seconds - properties={"unix_timestamp": int(datetime(2021, 4, 1, 19).timestamp())}, - group2_properties={"unix_timestamp": int(datetime(2021, 4, 1, 19).timestamp())}, - ), - _create_event( - event="$pageview", - team=team, - distinct_id="whatever", - properties={"long_date": f"{datetime(2021, 4, 1, 18):%Y-%m-%d %H:%M:%S%z}"}, - group2_properties={"long_date": f"{datetime(2021, 4, 1, 18):%Y-%m-%d %H:%M:%S%z}"}, - ), - _create_event( - event="$pageview", - team=team, - distinct_id="whatever", - properties={"long_date": f"{datetime(2021, 4, 1, 19):%Y-%m-%d %H:%M:%S%z}"}, - group2_properties={"long_date": f"{datetime(2021, 4, 1, 19):%Y-%m-%d %H:%M:%S%z}"}, - ), - _create_event( - event="$pageview", - team=team, - distinct_id="whatever", - properties={"short_date": f"{datetime(2021, 4, 4):%Y-%m-%d}"}, - group2_properties={"short_date": f"{datetime(2021, 4, 4):%Y-%m-%d}"}, - ), - _create_event( - event="$pageview", - team=team, - distinct_id="whatever", - properties={"short_date": f"{datetime(2021, 4, 6):%Y-%m-%d}"}, - group2_properties={"short_date": f"{datetime(2021, 4, 6):%Y-%m-%d}"}, - ), - # unix timestamp in seconds with fractions of a second - _create_event( - event="$pageview", - team=team, - distinct_id="whatever", - properties={"sdk_$time": 1639427152.339}, - group2_properties={"sdk_$time": 1639427152.339}, - ), - # unix timestamp in milliseconds - _create_event( - event="$pageview", - team=team, - distinct_id="whatever", - properties={"unix_timestamp_milliseconds": 1641977394339}, - group2_properties={"unix_timestamp_milliseconds": 1641977394339}, - ), - _create_event( - event="$pageview", - team=team, - distinct_id="whatever", - properties={"rfc_822_time": "Wed, 02 Oct 2002 15:00:00 +0200"}, - group2_properties={"rfc_822_time": "Wed, 02 Oct 2002 15:00:00 +0200"}, - ), - _create_event( - event="$pageview", - team=team, - distinct_id="whatever", - properties={"iso_8601_$time": f"{datetime(2021, 4, 1, 19):%Y-%m-%dT%H:%M:%S%Z}"}, - group2_properties={"iso_8601_$time": f"{datetime(2021, 4, 1, 19):%Y-%m-%dT%H:%M:%S%Z}"}, - ), - _create_event( - event="$pageview", - team=team, - distinct_id="whatever", - properties={"full_date_increasing_$time": f"{datetime(2021, 4, 1, 19):%d-%m-%Y %H:%M:%S}"}, - group2_properties={"full_date_increasing_$time": f"{datetime(2021, 4, 1, 19):%d-%m-%Y %H:%M:%S}"}, - ), - _create_event( - event="$pageview", - team=team, - distinct_id="whatever", - properties={"with_slashes_$time": f"{datetime(2021, 4, 1, 19):%Y/%m/%d %H:%M:%S}"}, - group2_properties={"with_slashes_$time": f"{datetime(2021, 4, 1, 19):%Y/%m/%d %H:%M:%S}"}, - ), - _create_event( - event="$pageview", - team=team, - distinct_id="whatever", - properties={"with_slashes_increasing_$time": f"{datetime(2021, 4, 1, 19):%d/%m/%Y %H:%M:%S}"}, - group2_properties={"with_slashes_increasing_$time": f"{datetime(2021, 4, 1, 19):%d/%m/%Y %H:%M:%S}"}, - ), - _create_event( - event="$pageview", - team=team, - distinct_id="whatever", - # seven digit unix timestamp in seconds - 7840800 - # Clickhouse cannot parse this. It isn't matched in tests from TEST_PROPERTIES - properties={"unix_timestamp": int(datetime(1970, 4, 1, 18).timestamp())}, - group2_properties={"unix_timestamp": int(datetime(1970, 4, 1, 18).timestamp())}, - ), - _create_event( - event="$pageview", - team=team, - distinct_id="whatever", - # nine digit unix timestamp in seconds - 323460000 - properties={"unix_timestamp": int(datetime(1980, 4, 1, 18).timestamp())}, - group2_properties={"unix_timestamp": int(datetime(1980, 4, 1, 18).timestamp())}, - ), - _create_event( - # matched by exact date test - event="$pageview", - team=team, - distinct_id="whatever", - properties={"date_only": f"{datetime(2021, 4, 1):%d/%m/%Y}"}, - group2_properties={"date_only": f"{datetime(2021, 4, 1):%d/%m/%Y}"}, - ), - _create_event( - # should not be matched by exact date test - event="$pageview", - team=team, - distinct_id="whatever", - properties={"date_only": f"{datetime(2021, 4, 1, 11):%d/%m/%Y}"}, - group2_properties={"date_only": f"{datetime(2021, 4, 1, 11):%d/%m/%Y}"}, - ), - _create_event( - # not matched by exact date test - event="$pageview", - team=team, - distinct_id="whatever", - properties={"date_only": f"{datetime(2021, 4, 2):%d/%m/%Y}"}, - group2_properties={"date_only": f"{datetime(2021, 4, 2):%d/%m/%Y}"}, - ), - _create_event( - event="$pageview", - team=team, - distinct_id="whatever", - properties={"date_only_matched_against_date_and_time": f"{datetime(2021, 3, 31, 18):%d/%m/%Y %H:%M:%S}"}, - group2_properties={ - "date_only_matched_against_date_and_time": f"{datetime(2021, 3, 31, 18):%d/%m/%Y %H:%M:%S}" - }, - ), - _create_event( - event="$pageview", - team=team, - distinct_id="whatever", - properties={"date_only_matched_against_date_and_time": int(datetime(2021, 3, 31, 14).timestamp())}, - group2_properties={"date_only_matched_against_date_and_time": int(datetime(2021, 3, 31, 14).timestamp())}, - ), - _create_event( - event="$pageview", - team=team, - distinct_id="whatever", - # include milliseconds, to prove they're ignored in the query - properties={ - "date_exact_including_seconds_and_milliseconds": f"{datetime(2021, 3, 31, 18, 12, 12, 12):%d/%m/%Y %H:%M:%S.%f}" - }, - group2_properties={ - "date_exact_including_seconds_and_milliseconds": f"{datetime(2021, 3, 31, 18, 12, 12, 12):%d/%m/%Y %H:%M:%S.%f}" - }, - ), - _create_event( - event="$pageview", - team=team, - distinct_id="whatever", - # include milliseconds, to prove they're don't cause a date to be included in an after filter - properties={ - "date_exact_including_seconds_and_milliseconds": f"{datetime(2021, 3, 31, 23, 59, 59, 12):%d/%m/%Y %H:%M:%S.%f}" - }, - group2_properties={ - "date_exact_including_seconds_and_milliseconds": f"{datetime(2021, 3, 31, 23, 59, 59, 12):%d/%m/%Y %H:%M:%S.%f}" - }, - ), - ] - - -@pytest.fixture -def clean_up_materialised_columns(): - try: - yield - finally: - # after test cleanup - cleanup_materialized_columns() - - -TEST_PROPERTIES = [ - pytest.param(Property(key="email", value="test@posthog.com"), [0]), - pytest.param(Property(key="email", value="test@posthog.com", operator="exact"), [0]), - pytest.param( - Property( - key="email", - value=["pineapple@pizza.com", "mongo@example.com"], - operator="exact", - ), - [1], - ), - pytest.param( - Property(key="attr", value="5"), - [4], - id="matching a number only matches event index 4 from test_events", - ), - pytest.param( - Property(key="email", value="test@posthog.com", operator="is_not"), - range(1, 27), - id="matching on email is not a value matches all but the first event from test_events", - ), - pytest.param( - Property( - key="email", - value=["test@posthog.com", "mongo@example.com"], - operator="is_not", - ), - range(2, 27), - id="matching on email is not a value matches all but the first two events from test_events", - ), - pytest.param(Property(key="email", value=r".*est@.*", operator="regex"), [0]), - pytest.param(Property(key="email", value=r"?.", operator="regex"), []), - pytest.param(Property(key="email", operator="is_set", value="is_set"), [0, 1]), - pytest.param( - Property(key="email", operator="is_not_set", value="is_not_set"), - range(2, 27), - id="matching for email property not being set matches all but the first two events from test_events", - ), - pytest.param( - Property(key="unix_timestamp", operator="is_date_before", value="2021-04-02"), - [5, 6, 19], - id="matching before a unix timestamp only querying by date", - ), - pytest.param( - Property(key="unix_timestamp", operator="is_date_after", value="2021-03-31"), - [5, 6], - id="matching after a unix timestamp only querying by date", - ), - pytest.param( - Property(key="unix_timestamp", operator="is_date_before", value="2021-04-01 18:30:00"), - [5, 19], - id="matching before a unix timestamp querying by date and time", - ), - pytest.param( - Property(key="unix_timestamp", operator="is_date_after", value="2021-04-01 18:30:00"), - [6], - id="matching after a unix timestamp querying by date and time", - ), - pytest.param(Property(key="long_date", operator="is_date_before", value="2021-04-02"), [7, 8]), - pytest.param( - Property(key="long_date", operator="is_date_after", value="2021-03-31"), - [7, 8], - id="match after date only value against date and time formatted property", - ), - pytest.param( - Property(key="long_date", operator="is_date_before", value="2021-04-01 18:30:00"), - [7], - ), - pytest.param( - Property(key="long_date", operator="is_date_after", value="2021-04-01 18:30:00"), - [8], - ), - pytest.param(Property(key="short_date", operator="is_date_before", value="2021-04-05"), [9]), - pytest.param(Property(key="short_date", operator="is_date_after", value="2021-04-05"), [10]), - pytest.param( - Property(key="short_date", operator="is_date_before", value="2021-04-07"), - [9, 10], - ), - pytest.param( - Property(key="short_date", operator="is_date_after", value="2021-04-03"), - [9, 10], - ), - pytest.param( - Property(key="sdk_$time", operator="is_date_before", value="2021-12-25"), - [11], - id="matching a unix timestamp in seconds with fractional seconds after the decimal point", - ), - pytest.param( - Property( - key="unix_timestamp_milliseconds", - operator="is_date_after", - value="2022-01-11", - ), - [12], - id="matching unix timestamp in milliseconds after a given date (which ClickHouse doesn't support)", - ), - pytest.param( - Property( - key="unix_timestamp_milliseconds", - operator="is_date_before", - value="2022-01-13", - ), - [12], - id="matching unix timestamp in milliseconds before a given date (which ClickHouse doesn't support)", - ), - pytest.param( - Property(key="rfc_822_time", operator="is_date_before", value="2002-10-02 17:01:00"), - [13], - id="matching rfc 822 format date with timeszone offset before a given date", - ), - pytest.param( - Property(key="rfc_822_time", operator="is_date_after", value="2002-10-02 14:59:00"), - [], - id="matching rfc 822 format date takes into account timeszone offset after a given date", - ), - pytest.param( - Property(key="rfc_822_time", operator="is_date_after", value="2002-10-02 12:59:00"), - [13], - id="matching rfc 822 format date after a given date", - ), - pytest.param( - Property(key="iso_8601_$time", operator="is_date_before", value="2021-04-01 20:00:00"), - [14], - id="matching ISO 8601 format date before a given date", - ), - pytest.param( - Property(key="iso_8601_$time", operator="is_date_after", value="2021-04-01 18:00:00"), - [14], - id="matching ISO 8601 format date after a given date", - ), - pytest.param( - Property( - key="full_date_increasing_$time", - operator="is_date_before", - value="2021-04-01 20:00:00", - ), - [15], - id="matching full format date with date parts n increasing order before a given date", - ), - pytest.param( - Property( - key="full_date_increasing_$time", - operator="is_date_after", - value="2021-04-01 18:00:00", - ), - [15], - id="matching full format date with date parts in increasing order after a given date", - ), - pytest.param( - Property( - key="with_slashes_$time", - operator="is_date_before", - value="2021-04-01 20:00:00", - ), - [16], - id="matching full format date with date parts separated by slashes before a given date", - ), - pytest.param( - Property( - key="with_slashes_$time", - operator="is_date_after", - value="2021-04-01 18:00:00", - ), - [16], - id="matching full format date with date parts separated by slashes after a given date", - ), - pytest.param( - Property( - key="with_slashes_increasing_$time", - operator="is_date_before", - value="2021-04-01 20:00:00", - ), - [17], - id="matching full format date with date parts increasing in size and separated by slashes before a given date", - ), - pytest.param( - Property( - key="with_slashes_increasing_$time", - operator="is_date_after", - value="2021-04-01 18:00:00", - ), - [17], - id="matching full format date with date parts increasing in size and separated by slashes after a given date", - ), - pytest.param( - Property(key="date_only", operator="is_date_exact", value="2021-04-01"), - [20, 21], - id="can match dates exactly", - ), - pytest.param( - Property( - key="date_only_matched_against_date_and_time", - operator="is_date_exact", - value="2021-03-31", - ), - [23, 24], - id="can match dates exactly against datetimes and unix timestamps", - ), - pytest.param( - Property( - key="date_exact_including_seconds_and_milliseconds", - operator="is_date_exact", - value="2021-03-31 18:12:12", - ), - [25], - id="can match date times exactly against datetimes with milliseconds", - ), - pytest.param( - Property( - key="date_exact_including_seconds_and_milliseconds", - operator="is_date_after", - value="2021-03-31", - ), - [], - id="can match date only filter after against datetime with milliseconds", - ), - pytest.param( - Property(key="date_only", operator="is_date_after", value="2021-04-01"), - [22], - id="can match after date only values", - ), - pytest.param( - Property(key="date_only", operator="is_date_before", value="2021-04-02"), - [20, 21], - id="can match before date only values", - ), -] - - -@pytest.mark.parametrize("property,expected_event_indexes", TEST_PROPERTIES) -@freeze_time("2021-04-01T01:00:00.000Z") -def test_prop_filter_json_extract(test_events, clean_up_materialised_columns, property, expected_event_indexes, team): - query, params = prop_filter_json_extract(property, 0, allow_denormalized_props=False) - uuids = sorted( - [ - str(uuid) - for (uuid,) in sync_execute( - f"SELECT uuid FROM events WHERE team_id = %(team_id)s {query}", - {"team_id": team.pk, **params}, - ) - ] - ) - expected = sorted([test_events[index] for index in expected_event_indexes]) - - assert len(uuids) == len(expected) # helpful when diagnosing assertion failure below - assert uuids == expected - - -@pytest.mark.parametrize("property,expected_event_indexes", TEST_PROPERTIES) -@freeze_time("2021-04-01T01:00:00.000Z") -def test_prop_filter_json_extract_materialized( - test_events, clean_up_materialised_columns, property, expected_event_indexes, team -): - materialize("events", property.key) - - query, params = prop_filter_json_extract(property, 0, allow_denormalized_props=True) - - assert "JSONExtract" not in query - - uuids = sorted( - [ - str(uuid) - for (uuid,) in sync_execute( - f"SELECT uuid FROM events WHERE team_id = %(team_id)s {query}", - {"team_id": team.pk, **params}, - ) - ] - ) - expected = sorted([test_events[index] for index in expected_event_indexes]) - - assert uuids == expected - - -@pytest.mark.parametrize("property,expected_event_indexes", TEST_PROPERTIES) -@freeze_time("2021-04-01T01:00:00.000Z") -def test_prop_filter_json_extract_person_on_events_materialized( - test_events, clean_up_materialised_columns, property, expected_event_indexes, team -): - if not get_instance_setting("PERSON_ON_EVENTS_ENABLED"): - return - - # simulates a group property being materialised - materialize("events", property.key, table_column="group2_properties") - - query, params = prop_filter_json_extract(property, 0, allow_denormalized_props=True) - # this query uses the `properties` column, thus the materialized column is different. - assert ("JSON" in query) or ("AND 1 = 2" == query) - - query, params = prop_filter_json_extract( - property, 0, allow_denormalized_props=True, use_event_column="group2_properties" - ) - assert "JSON" not in query - - uuids = sorted( - [ - str(uuid) - for (uuid,) in sync_execute( - f"SELECT uuid FROM events WHERE team_id = %(team_id)s {query}", - {"team_id": team.pk, **params}, - ) - ] - ) - expected = sorted([test_events[index] for index in expected_event_indexes]) - - assert uuids == expected - - -def test_combine_group_properties(): - propertyA = Property(key="a", operator="exact", value=["a", "b", "c"]) - propertyB = Property(key="b", operator="exact", value=["d", "e", "f"]) - propertyC = Property(key="c", operator="exact", value=["g", "h", "i"]) - propertyD = Property(key="d", operator="exact", value=["j", "k", "l"]) - - property_group = PropertyGroup(PropertyOperatorType.OR, [propertyA, propertyB]) - - combined_group = property_group.combine_properties(PropertyOperatorType.AND, [propertyC, propertyD]) - assert combined_group.to_dict() == { - "type": "AND", - "values": [ - { - "type": "OR", - "values": [ - { - "key": "a", - "operator": "exact", - "value": ["a", "b", "c"], - "type": "event", - }, - { - "key": "b", - "operator": "exact", - "value": ["d", "e", "f"], - "type": "event", - }, - ], - }, - { - "type": "AND", - "values": [ - { - "key": "c", - "operator": "exact", - "value": ["g", "h", "i"], - "type": "event", - }, - { - "key": "d", - "operator": "exact", - "value": ["j", "k", "l"], - "type": "event", - }, - ], - }, - ], - } - - combined_group = property_group.combine_properties(PropertyOperatorType.OR, [propertyC, propertyD]) - assert combined_group.to_dict() == { - "type": "OR", - "values": [ - { - "type": "OR", - "values": [ - { - "key": "a", - "operator": "exact", - "value": ["a", "b", "c"], - "type": "event", - }, - { - "key": "b", - "operator": "exact", - "value": ["d", "e", "f"], - "type": "event", - }, - ], - }, - { - "type": "AND", - "values": [ - { - "key": "c", - "operator": "exact", - "value": ["g", "h", "i"], - "type": "event", - }, - { - "key": "d", - "operator": "exact", - "value": ["j", "k", "l"], - "type": "event", - }, - ], - }, - ], - } - - combined_group = property_group.combine_properties(PropertyOperatorType.OR, []) - assert combined_group.to_dict() == { - "type": "OR", - "values": [ - { - "key": "a", - "operator": "exact", - "value": ["a", "b", "c"], - "type": "event", - }, - { - "key": "b", - "operator": "exact", - "value": ["d", "e", "f"], - "type": "event", - }, - ], - } - - combined_group = PropertyGroup(PropertyOperatorType.AND, cast(list[Property], [])).combine_properties( - PropertyOperatorType.OR, [propertyC, propertyD] - ) - assert combined_group.to_dict() == { - "type": "AND", - "values": [ - { - "key": "c", - "operator": "exact", - "value": ["g", "h", "i"], - "type": "event", - }, - { - "key": "d", - "operator": "exact", - "value": ["j", "k", "l"], - "type": "event", - }, - ], - } - - -def test_session_property_validation(): - # Property key not valid for type session - with pytest.raises(ValidationError): - filter = Filter( - data={ - "properties": [ - { - "type": "session", - "key": "some_prop", - "value": 0, - "operator": "gt", - } - ] - } - ) - parse_prop_grouped_clauses( - team_id=1, - property_group=filter.property_groups, - hogql_context=filter.hogql_context, - ) - - # Operator not valid for $session_duration - with pytest.raises(ValidationError): - filter = Filter( - data={ - "properties": [ - { - "type": "session", - "key": "$session_duration", - "value": 0, - "operator": "is_set", - } - ] - } - ) - parse_prop_grouped_clauses( - team_id=1, - property_group=filter.property_groups, - hogql_context=filter.hogql_context, - ) - - # Value not valid for $session_duration - with pytest.raises(ValidationError): - filter = Filter( - data={ - "properties": [ - { - "type": "session", - "key": "$session_duration", - "value": "hey", - "operator": "gt", - } - ] - } - ) - parse_prop_grouped_clauses( - team_id=1, - property_group=filter.property_groups, - hogql_context=filter.hogql_context, - ) - - # Valid property values - filter = Filter( - data={ - "properties": [ - { - "type": "session", - "key": "$session_duration", - "value": "100", - "operator": "gt", - } - ] - } - ) - parse_prop_grouped_clauses( - team_id=1, - property_group=filter.property_groups, - hogql_context=filter.hogql_context, - ) diff --git a/ee/clickhouse/models/test/utils/util.py b/ee/clickhouse/models/test/utils/util.py deleted file mode 100644 index 6194a6a6a9..0000000000 --- a/ee/clickhouse/models/test/utils/util.py +++ /dev/null @@ -1,14 +0,0 @@ -from time import sleep, time - -from posthog.client import sync_execute - - -# this normally is unnecessary as CH is fast to consume from Kafka when testing -# but it helps prevent potential flakiness -def delay_until_clickhouse_consumes_from_kafka(table_name: str, target_row_count: int, timeout_seconds=10) -> None: - ts_start = time() - while time() < ts_start + timeout_seconds: - result = sync_execute(f"SELECT COUNT(1) FROM {table_name}") - if result[0][0] == target_row_count: - return - sleep(0.5) diff --git a/ee/clickhouse/queries/__init__.py b/ee/clickhouse/queries/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/clickhouse/queries/column_optimizer.py b/ee/clickhouse/queries/column_optimizer.py deleted file mode 100644 index df7873a7ce..0000000000 --- a/ee/clickhouse/queries/column_optimizer.py +++ /dev/null @@ -1,115 +0,0 @@ -from collections import Counter as TCounter -from typing import cast - -from posthog.constants import TREND_FILTER_TYPE_ACTIONS, FunnelCorrelationType -from posthog.models.action.util import get_action_tables_and_properties -from posthog.models.filters.mixins.utils import cached_property -from posthog.models.filters.properties_timeline_filter import PropertiesTimelineFilter -from posthog.models.filters.stickiness_filter import StickinessFilter -from posthog.models.filters.utils import GroupTypeIndex -from posthog.models.property import PropertyIdentifier -from posthog.models.property.util import ( - box_value, - count_hogql_properties, - extract_tables_and_properties, -) -from posthog.queries.column_optimizer.foss_column_optimizer import FOSSColumnOptimizer -from posthog.queries.trends.util import is_series_group_based - - -class EnterpriseColumnOptimizer(FOSSColumnOptimizer): - @cached_property - def group_types_to_query(self) -> set[GroupTypeIndex]: - used_properties = self.used_properties_with_type("group") - return {cast(GroupTypeIndex, group_type_index) for _, _, group_type_index in used_properties} - - @cached_property - def properties_used_in_filter(self) -> TCounter[PropertyIdentifier]: - "Returns collection of properties + types that this query would use" - counter: TCounter[PropertyIdentifier] = extract_tables_and_properties(self.filter.property_groups.flat) - - if not isinstance(self.filter, StickinessFilter): - # Some breakdown types read properties - # - # See ee/clickhouse/queries/trends/breakdown.py#get_query or - # ee/clickhouse/queries/breakdown_props.py#get_breakdown_prop_values - if self.filter.breakdown_type in ["event", "person"]: - boxed_breakdown = box_value(self.filter.breakdown) - for b in boxed_breakdown: - if isinstance(b, str): - counter[ - ( - b, - self.filter.breakdown_type, - self.filter.breakdown_group_type_index, - ) - ] += 1 - elif self.filter.breakdown_type == "group": - # :TRICKY: We only support string breakdown for group properties - assert isinstance(self.filter.breakdown, str) - counter[ - ( - self.filter.breakdown, - self.filter.breakdown_type, - self.filter.breakdown_group_type_index, - ) - ] += 1 - elif self.filter.breakdown_type == "hogql": - if isinstance(self.filter.breakdown, list): - expr = str(self.filter.breakdown[0]) - else: - expr = str(self.filter.breakdown) - counter = count_hogql_properties(expr, counter) - - # If we have a breakdowns attribute then make sure we pull in everything we - # need to calculate it - for breakdown in self.filter.breakdowns or []: - if breakdown["type"] == "hogql": - counter = count_hogql_properties(breakdown["property"], counter) - else: - counter[ - ( - breakdown["property"], - breakdown["type"], - self.filter.breakdown_group_type_index, - ) - ] += 1 - - # Both entities and funnel exclusions can contain nested property filters - for entity in self.entities_used_in_filter(): - counter += extract_tables_and_properties(entity.property_groups.flat) - - # Math properties are also implicitly used. - # - # See posthog/queries/trends/util.py#process_math - if entity.math_property: - counter[(entity.math_property, "event", None)] += 1 - - # If groups are involved, they're also used - # - # See posthog/queries/trends/util.py#process_math - if is_series_group_based(entity): - counter[(f"$group_{entity.math_group_type_index}", "event", None)] += 1 - - if entity.math == "unique_session": - counter[(f"$session_id", "event", None)] += 1 - - # :TRICKY: If action contains property filters, these need to be included - # - # See ee/clickhouse/models/action.py#format_action_filter for an example - if entity.type == TREND_FILTER_TYPE_ACTIONS: - counter += get_action_tables_and_properties(entity.get_action()) - - if ( - not isinstance(self.filter, StickinessFilter | PropertiesTimelineFilter) - and self.filter.correlation_type == FunnelCorrelationType.PROPERTIES - and self.filter.correlation_property_names - ): - if self.filter.aggregation_group_type_index is not None: - for prop_value in self.filter.correlation_property_names: - counter[(prop_value, "group", self.filter.aggregation_group_type_index)] += 1 - else: - for prop_value in self.filter.correlation_property_names: - counter[(prop_value, "person", None)] += 1 - - return counter diff --git a/ee/clickhouse/queries/enterprise_cohort_query.py b/ee/clickhouse/queries/enterprise_cohort_query.py deleted file mode 100644 index 0629c6757a..0000000000 --- a/ee/clickhouse/queries/enterprise_cohort_query.py +++ /dev/null @@ -1,422 +0,0 @@ -from typing import Any, cast - -from posthog.constants import PropertyOperatorType -from posthog.models.cohort.util import get_count_operator -from posthog.models.filters.mixins.utils import cached_property -from posthog.models.property.property import Property, PropertyGroup -from posthog.queries.foss_cohort_query import ( - FOSSCohortQuery, - parse_and_validate_positive_integer, - validate_entity, - validate_interval, - validate_seq_date_more_recent_than_date, -) -from posthog.queries.util import PersonPropertiesMode -from posthog.schema import PersonsOnEventsMode - - -def check_negation_clause(prop: PropertyGroup) -> tuple[bool, bool]: - has_negation_clause = False - has_primary_clase = False - if len(prop.values): - if isinstance(prop.values[0], PropertyGroup): - for p in cast(list[PropertyGroup], prop.values): - has_neg, has_primary = check_negation_clause(p) - has_negation_clause = has_negation_clause or has_neg - has_primary_clase = has_primary_clase or has_primary - - else: - for property in cast(list[Property], prop.values): - if property.negation: - has_negation_clause = True - else: - has_primary_clase = True - - if prop.type == PropertyOperatorType.AND and has_negation_clause and has_primary_clase: - # this negation is valid, since all conditions are met. - # So, we don't need to pair this with anything in the rest of the tree - # return no negations, and yes to primary clauses - return False, True - - return has_negation_clause, has_primary_clase - - -class EnterpriseCohortQuery(FOSSCohortQuery): - def get_query(self) -> tuple[str, dict[str, Any]]: - if not self._outer_property_groups: - # everything is pushed down, no behavioral stuff to do - # thus, use personQuery directly - return self._person_query.get_query(prepend=self._cohort_pk) - - # TODO: clean up this kludge. Right now, get_conditions has to run first so that _fields is populated for _get_behavioral_subquery() - conditions, condition_params = self._get_conditions() - self.params.update(condition_params) - - subq = [] - - if self.sequence_filters_to_query: - ( - sequence_query, - sequence_params, - sequence_query_alias, - ) = self._get_sequence_query() - subq.append((sequence_query, sequence_query_alias)) - self.params.update(sequence_params) - else: - ( - behavior_subquery, - behavior_subquery_params, - behavior_query_alias, - ) = self._get_behavior_subquery() - subq.append((behavior_subquery, behavior_query_alias)) - self.params.update(behavior_subquery_params) - - person_query, person_params, person_query_alias = self._get_persons_query(prepend=str(self._cohort_pk)) - subq.append((person_query, person_query_alias)) - self.params.update(person_params) - - # Since we can FULL OUTER JOIN, we may end up with pairs of uuids where one side is blank. Always try to choose the non blank ID - q, fields = self._build_sources(subq) - - # optimize_aggregation_in_order slows down this query but massively decreases memory usage - # this is fine for offline cohort calculation - final_query = f""" - SELECT {fields} AS id FROM - {q} - WHERE 1 = 1 - {conditions} - SETTINGS optimize_aggregation_in_order = 1, join_algorithm = 'auto' - """ - - return final_query, self.params - - def _get_condition_for_property(self, prop: Property, prepend: str, idx: int) -> tuple[str, dict[str, Any]]: - res: str = "" - params: dict[str, Any] = {} - - if prop.type == "behavioral": - if prop.value == "performed_event": - res, params = self.get_performed_event_condition(prop, prepend, idx) - elif prop.value == "performed_event_multiple": - res, params = self.get_performed_event_multiple(prop, prepend, idx) - elif prop.value == "stopped_performing_event": - res, params = self.get_stopped_performing_event(prop, prepend, idx) - elif prop.value == "restarted_performing_event": - res, params = self.get_restarted_performing_event(prop, prepend, idx) - elif prop.value == "performed_event_first_time": - res, params = self.get_performed_event_first_time(prop, prepend, idx) - elif prop.value == "performed_event_sequence": - res, params = self.get_performed_event_sequence(prop, prepend, idx) - elif prop.value == "performed_event_regularly": - res, params = self.get_performed_event_regularly(prop, prepend, idx) - elif prop.type == "person": - res, params = self.get_person_condition(prop, prepend, idx) - elif ( - prop.type == "static-cohort" - ): # "cohort" and "precalculated-cohort" are handled by flattening during initialization - res, params = self.get_static_cohort_condition(prop, prepend, idx) - else: - raise ValueError(f"Invalid property type for Cohort queries: {prop.type}") - - return res, params - - def get_stopped_performing_event(self, prop: Property, prepend: str, idx: int) -> tuple[str, dict[str, Any]]: - event = (prop.event_type, prop.key) - column_name = f"stopped_event_condition_{prepend}_{idx}" - - entity_query, entity_params = self._get_entity(event, prepend, idx) - date_value = parse_and_validate_positive_integer(prop.time_value, "time_value") - date_param = f"{prepend}_date_{idx}" - date_interval = validate_interval(prop.time_interval) - - seq_date_value = parse_and_validate_positive_integer(prop.seq_time_value, "time_value") - seq_date_param = f"{prepend}_seq_date_{idx}" - seq_date_interval = validate_interval(prop.seq_time_interval) - - validate_seq_date_more_recent_than_date((seq_date_value, seq_date_interval), (date_value, date_interval)) - - self._check_earliest_date((date_value, date_interval)) - - # The user was doing the event in this time period - event_was_happening_period = f"countIf(timestamp > now() - INTERVAL %({date_param})s {date_interval} AND timestamp <= now() - INTERVAL %({seq_date_param})s {seq_date_interval} AND {entity_query})" - # Then stopped in this time period - event_stopped_period = f"countIf(timestamp > now() - INTERVAL %({seq_date_param})s {seq_date_interval} AND timestamp <= now() AND {entity_query})" - - full_condition = f"({event_was_happening_period} > 0 AND {event_stopped_period} = 0) as {column_name}" - - self._fields.append(full_condition) - - return ( - f"{'NOT' if prop.negation else ''} {column_name}", - { - f"{date_param}": date_value, - f"{seq_date_param}": seq_date_value, - **entity_params, - }, - ) - - def get_restarted_performing_event(self, prop: Property, prepend: str, idx: int) -> tuple[str, dict[str, Any]]: - event = (prop.event_type, prop.key) - column_name = f"restarted_event_condition_{prepend}_{idx}" - - entity_query, entity_params = self._get_entity(event, prepend, idx) - date_value = parse_and_validate_positive_integer(prop.time_value, "time_value") - date_param = f"{prepend}_date_{idx}" - date_interval = validate_interval(prop.time_interval) - - seq_date_value = parse_and_validate_positive_integer(prop.seq_time_value, "time_value") - seq_date_param = f"{prepend}_seq_date_{idx}" - seq_date_interval = validate_interval(prop.seq_time_interval) - - validate_seq_date_more_recent_than_date((seq_date_value, seq_date_interval), (date_value, date_interval)) - - self._restrict_event_query_by_time = False - - # Events should have been fired in the initial_period - initial_period = f"countIf(timestamp <= now() - INTERVAL %({date_param})s {date_interval} AND {entity_query})" - # Then stopped in the event_stopped_period - event_stopped_period = f"countIf(timestamp > now() - INTERVAL %({date_param})s {date_interval} AND timestamp <= now() - INTERVAL %({seq_date_param})s {seq_date_interval} AND {entity_query})" - # Then restarted in the final event_restart_period - event_restarted_period = f"countIf(timestamp > now() - INTERVAL %({seq_date_param})s {seq_date_interval} AND timestamp <= now() AND {entity_query})" - - full_condition = ( - f"({initial_period} > 0 AND {event_stopped_period} = 0 AND {event_restarted_period} > 0) as {column_name}" - ) - - self._fields.append(full_condition) - - return ( - f"{'NOT' if prop.negation else ''} {column_name}", - { - f"{date_param}": date_value, - f"{seq_date_param}": seq_date_value, - **entity_params, - }, - ) - - def get_performed_event_first_time(self, prop: Property, prepend: str, idx: int) -> tuple[str, dict[str, Any]]: - event = (prop.event_type, prop.key) - entity_query, entity_params = self._get_entity(event, prepend, idx) - - column_name = f"first_time_condition_{prepend}_{idx}" - - date_value = parse_and_validate_positive_integer(prop.time_value, "time_value") - date_param = f"{prepend}_date_{idx}" - date_interval = validate_interval(prop.time_interval) - - self._restrict_event_query_by_time = False - - field = f"minIf(timestamp, {entity_query}) >= now() - INTERVAL %({date_param})s {date_interval} AND minIf(timestamp, {entity_query}) < now() as {column_name}" - - self._fields.append(field) - - return ( - f"{'NOT' if prop.negation else ''} {column_name}", - {f"{date_param}": date_value, **entity_params}, - ) - - def get_performed_event_regularly(self, prop: Property, prepend: str, idx: int) -> tuple[str, dict[str, Any]]: - event = (prop.event_type, prop.key) - entity_query, entity_params = self._get_entity(event, prepend, idx) - - column_name = f"performed_event_regularly_{prepend}_{idx}" - - date_interval = validate_interval(prop.time_interval) - - time_value_param = f"{prepend}_time_value_{idx}" - time_value = parse_and_validate_positive_integer(prop.time_value, "time_value") - - operator_value_param = f"{prepend}_operator_value_{idx}" - operator_value = parse_and_validate_positive_integer(prop.operator_value, "operator_value") - - min_periods_param = f"{prepend}_min_periods_{idx}" - min_period_count = parse_and_validate_positive_integer(prop.min_periods, "min_periods") - - total_period_count = parse_and_validate_positive_integer(prop.total_periods, "total_periods") - - if min_period_count > total_period_count: - raise ( - ValueError( - f"min_periods ({min_period_count}) cannot be greater than total_periods ({total_period_count})" - ) - ) - - params = { - time_value_param: time_value, - operator_value_param: operator_value, - min_periods_param: min_period_count, - } - periods = [] - - if total_period_count: - for period in range(total_period_count): - start_time_value = f"%({time_value_param})s * {period}" - end_time_value = f"%({time_value_param})s * ({period} + 1)" - # Clause that returns 1 if the event was performed the expected number of times in the given time interval, otherwise 0 - periods.append( - f"if(countIf({entity_query} and timestamp <= now() - INTERVAL {start_time_value} {date_interval} and timestamp > now() - INTERVAL {end_time_value} {date_interval}) {get_count_operator(prop.operator)} %({operator_value_param})s, 1, 0)" - ) - earliest_date = (total_period_count * time_value, date_interval) - self._check_earliest_date(earliest_date) - - field = "+".join(periods) + f">= %({min_periods_param})s" + f" as {column_name}" - - self._fields.append(field) - - return ( - f"{'NOT' if prop.negation else ''} {column_name}", - {**entity_params, **params}, - ) - - @cached_property - def sequence_filters_to_query(self) -> list[Property]: - props = [] - for prop in self._filter.property_groups.flat: - if prop.value == "performed_event_sequence": - props.append(prop) - return props - - @cached_property - def sequence_filters_lookup(self) -> dict[str, str]: - lookup = {} - for idx, prop in enumerate(self.sequence_filters_to_query): - lookup[str(prop.to_dict())] = f"{idx}" - return lookup - - def _get_sequence_query(self) -> tuple[str, dict[str, Any], str]: - params = {} - - materialized_columns = list(self._column_optimizer.event_columns_to_query) - names = [ - "event", - "properties", - "distinct_id", - "timestamp", - *materialized_columns, - ] - - person_prop_query = "" - person_prop_params: dict = {} - - _inner_fields = [f"{self._person_id_alias} AS person_id"] - _intermediate_fields = ["person_id"] - _outer_fields = ["person_id"] - - _inner_fields.extend(names) - _intermediate_fields.extend(names) - - for idx, prop in enumerate(self.sequence_filters_to_query): - ( - step_cols, - intermediate_cols, - aggregate_cols, - seq_params, - ) = self._get_sequence_filter(prop, idx) - _inner_fields.extend(step_cols) - _intermediate_fields.extend(intermediate_cols) - _outer_fields.extend(aggregate_cols) - params.update(seq_params) - - date_condition, date_params = self._get_date_condition() - params.update(date_params) - - event_param_name = f"{self._cohort_pk}_event_ids" - - if self.should_pushdown_persons and self._person_on_events_mode != PersonsOnEventsMode.DISABLED: - person_prop_query, person_prop_params = self._get_prop_groups( - self._inner_property_groups, - person_properties_mode=PersonPropertiesMode.DIRECT_ON_EVENTS, - person_id_joined_alias=self._person_id_alias, - ) - - new_query = f""" - SELECT {", ".join(_inner_fields)} FROM events AS {self.EVENT_TABLE_ALIAS} - {self._get_person_ids_query()} - WHERE team_id = %(team_id)s - AND event IN %({event_param_name})s - {date_condition} - {person_prop_query} - """ - - intermediate_query = f""" - SELECT {", ".join(_intermediate_fields)} FROM ({new_query}) - """ - - _outer_fields.extend(self._fields) - - outer_query = f""" - SELECT {", ".join(_outer_fields)} FROM ({intermediate_query}) - GROUP BY person_id - """ - return ( - outer_query, - { - "team_id": self._team_id, - event_param_name: self._events, - **params, - **person_prop_params, - }, - self.FUNNEL_QUERY_ALIAS, - ) - - def _get_sequence_filter(self, prop: Property, idx: int) -> tuple[list[str], list[str], list[str], dict[str, Any]]: - event = validate_entity((prop.event_type, prop.key)) - entity_query, entity_params = self._get_entity(event, f"event_sequence_{self._cohort_pk}", idx) - seq_event = validate_entity((prop.seq_event_type, prop.seq_event)) - - seq_entity_query, seq_entity_params = self._get_entity(seq_event, f"seq_event_sequence_{self._cohort_pk}", idx) - - time_value = parse_and_validate_positive_integer(prop.time_value, "time_value") - time_interval = validate_interval(prop.time_interval) - seq_date_value = parse_and_validate_positive_integer(prop.seq_time_value, "time_value") - seq_date_interval = validate_interval(prop.seq_time_interval) - self._check_earliest_date((time_value, time_interval)) - - event_prepend = f"event_{idx}" - - duplicate_event = 0 - if event == seq_event: - duplicate_event = 1 - - aggregate_cols = [] - aggregate_condition = f"{'NOT' if prop.negation else ''} max(if({entity_query} AND {event_prepend}_latest_0 < {event_prepend}_latest_1 AND {event_prepend}_latest_1 <= {event_prepend}_latest_0 + INTERVAL {seq_date_value} {seq_date_interval}, 2, 1)) = 2 AS {self.SEQUENCE_FIELD_ALIAS}_{self.sequence_filters_lookup[str(prop.to_dict())]}" - aggregate_cols.append(aggregate_condition) - - condition_cols = [] - timestamp_condition = f"min({event_prepend}_latest_1) over (PARTITION by person_id ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND {duplicate_event} PRECEDING) {event_prepend}_latest_1" - condition_cols.append(f"{event_prepend}_latest_0") - condition_cols.append(timestamp_condition) - - step_cols = [] - step_cols.append( - f"if({entity_query} AND timestamp > now() - INTERVAL {time_value} {time_interval}, 1, 0) AS {event_prepend}_step_0" - ) - step_cols.append(f"if({event_prepend}_step_0 = 1, timestamp, null) AS {event_prepend}_latest_0") - - step_cols.append( - f"if({seq_entity_query} AND timestamp > now() - INTERVAL {time_value} {time_interval}, 1, 0) AS {event_prepend}_step_1" - ) - step_cols.append(f"if({event_prepend}_step_1 = 1, timestamp, null) AS {event_prepend}_latest_1") - - return ( - step_cols, - condition_cols, - aggregate_cols, - { - **entity_params, - **seq_entity_params, - }, - ) - - def get_performed_event_sequence(self, prop: Property, prepend: str, idx: int) -> tuple[str, dict[str, Any]]: - return ( - f"{self.SEQUENCE_FIELD_ALIAS}_{self.sequence_filters_lookup[str(prop.to_dict())]}", - {}, - ) - - # Check if negations are always paired with a positive filter - # raise a value error warning that this is an invalid cohort - def _validate_negations(self) -> None: - has_pending_negation, has_primary_clause = check_negation_clause(self._filter.property_groups) - if has_pending_negation: - raise ValueError("Negations must be paired with a positive filter.") diff --git a/ee/clickhouse/queries/event_query.py b/ee/clickhouse/queries/event_query.py deleted file mode 100644 index 64f08da69d..0000000000 --- a/ee/clickhouse/queries/event_query.py +++ /dev/null @@ -1,71 +0,0 @@ -from typing import Optional, Union - -from ee.clickhouse.queries.column_optimizer import EnterpriseColumnOptimizer -from ee.clickhouse.queries.groups_join_query import GroupsJoinQuery -from posthog.clickhouse.materialized_columns import ColumnName -from posthog.models.filters.filter import Filter -from posthog.models.filters.path_filter import PathFilter -from posthog.models.filters.properties_timeline_filter import PropertiesTimelineFilter -from posthog.models.filters.retention_filter import RetentionFilter -from posthog.models.filters.stickiness_filter import StickinessFilter -from posthog.models.property import PropertyName -from posthog.models.team import Team -from posthog.queries.event_query.event_query import EventQuery -from posthog.schema import PersonsOnEventsMode - - -class EnterpriseEventQuery(EventQuery): - _column_optimizer: EnterpriseColumnOptimizer - - def __init__( - self, - filter: Union[ - Filter, - PathFilter, - RetentionFilter, - StickinessFilter, - PropertiesTimelineFilter, - ], - team: Team, - round_interval=False, - should_join_distinct_ids=False, - should_join_persons=False, - # Extra events/person table columns to fetch since parent query needs them - extra_fields: Optional[list[ColumnName]] = None, - extra_event_properties: Optional[list[PropertyName]] = None, - extra_person_fields: Optional[list[ColumnName]] = None, - override_aggregate_users_by_distinct_id: Optional[bool] = None, - person_on_events_mode: PersonsOnEventsMode = PersonsOnEventsMode.DISABLED, - **kwargs, - ) -> None: - if extra_person_fields is None: - extra_person_fields = [] - if extra_event_properties is None: - extra_event_properties = [] - if extra_fields is None: - extra_fields = [] - super().__init__( - filter=filter, - team=team, - round_interval=round_interval, - should_join_distinct_ids=should_join_distinct_ids, - should_join_persons=should_join_persons, - extra_fields=extra_fields, - extra_event_properties=extra_event_properties, - extra_person_fields=extra_person_fields, - override_aggregate_users_by_distinct_id=override_aggregate_users_by_distinct_id, - person_on_events_mode=person_on_events_mode, - **kwargs, - ) - - self._column_optimizer = EnterpriseColumnOptimizer(self._filter, self._team_id) - - def _get_groups_query(self) -> tuple[str, dict]: - if isinstance(self._filter, PropertiesTimelineFilter): - raise Exception("Properties Timeline never needs groups query") - return GroupsJoinQuery( - self._filter, - self._team_id, - self._column_optimizer, - person_on_events_mode=self._person_on_events_mode, - ).get_join_query() diff --git a/ee/clickhouse/queries/experiments/__init__.py b/ee/clickhouse/queries/experiments/__init__.py deleted file mode 100644 index 89f0035201..0000000000 --- a/ee/clickhouse/queries/experiments/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# The FF variant name for control -CONTROL_VARIANT_KEY = "control" - -# controls minimum number of people to be exposed to a variant -# before the results are deemed significant -FF_DISTRIBUTION_THRESHOLD = 100 - -# If probability of a variant is below this threshold, it will be considered -# insignificant -MIN_PROBABILITY_FOR_SIGNIFICANCE = 0.9 - -# Trends only: If p-value is below this threshold, the results are considered significant -P_VALUE_SIGNIFICANCE_LEVEL = 0.05 - -CONTROL_VARIANT_KEY = "control" diff --git a/ee/clickhouse/queries/experiments/funnel_experiment_result.py b/ee/clickhouse/queries/experiments/funnel_experiment_result.py deleted file mode 100644 index f68816ed3b..0000000000 --- a/ee/clickhouse/queries/experiments/funnel_experiment_result.py +++ /dev/null @@ -1,193 +0,0 @@ -from dataclasses import asdict, dataclass -from datetime import datetime -import json -from typing import Optional -from zoneinfo import ZoneInfo - -from rest_framework.exceptions import ValidationError - -from posthog.constants import ExperimentNoResultsErrorKeys -from posthog.hogql_queries.experiments import CONTROL_VARIANT_KEY -from posthog.hogql_queries.experiments.funnels_statistics import ( - are_results_significant, - calculate_credible_intervals, - calculate_probabilities, -) -from posthog.models.experiment import ExperimentHoldout -from posthog.models.feature_flag import FeatureFlag -from posthog.models.filters.filter import Filter -from posthog.models.team import Team -from posthog.queries.funnels import ClickhouseFunnel -from posthog.schema import ExperimentSignificanceCode - -Probability = float - - -@dataclass(frozen=True) -class Variant: - key: str - success_count: int - failure_count: int - - -class ClickhouseFunnelExperimentResult: - """ - This class calculates Experiment Results. - It returns two things: - 1. A Funnel Breakdown based on Feature Flag values - 2. Probability that Feature Flag value 1 has better conversion rate then FeatureFlag value 2 - - Currently, we support a maximum of 10 feature flag values: control and 9 test variants - - The passed in Filter determines which funnel to create, along with the experiment start & end date values - - Calculating (2) uses sampling from a Beta distribution. If `control` value for the feature flag has 10 successes and 12 conversion failures, - we assume the conversion rate follows a Beta(10, 12) distribution. Same for `test` variant. - - Then, we calculcate how many times a sample from `test` variant is higher than a sample from the `control` variant. This becomes the - probability. - """ - - def __init__( - self, - filter: Filter, - team: Team, - feature_flag: FeatureFlag, - experiment_start_date: datetime, - experiment_end_date: Optional[datetime] = None, - holdout: Optional[ExperimentHoldout] = None, - funnel_class: type[ClickhouseFunnel] = ClickhouseFunnel, - ): - breakdown_key = f"$feature/{feature_flag.key}" - self.variants = [variant["key"] for variant in feature_flag.variants] - if holdout: - self.variants.append(f"holdout-{holdout.id}") - - # our filters assume that the given time ranges are in the project timezone. - # while start and end date are in UTC. - # so we need to convert them to the project timezone - if team.timezone: - start_date_in_project_timezone = experiment_start_date.astimezone(ZoneInfo(team.timezone)) - end_date_in_project_timezone = ( - experiment_end_date.astimezone(ZoneInfo(team.timezone)) if experiment_end_date else None - ) - - query_filter = filter.shallow_clone( - { - "date_from": start_date_in_project_timezone, - "date_to": end_date_in_project_timezone, - "explicit_date": True, - "breakdown": breakdown_key, - "breakdown_type": "event", - "properties": [], - # :TRICKY: We don't use properties set on filters, as these - # correspond to feature flag properties, not the funnel properties. - # This is also why we simplify only right now so new properties (from test account filters) - # are added appropriately. - "is_simplified": False, - } - ) - self.funnel = funnel_class(query_filter, team) - - def get_results(self, validate: bool = True): - funnel_results = self.funnel.run() - - basic_result_props = { - # TODO: check if this can error out or not?, i.e. results don't have 0 index? - "insight": [result for result in funnel_results if result[0]["breakdown_value"][0] in self.variants], - "filters": self.funnel._filter.to_dict(), - } - - try: - validate_event_variants(funnel_results, self.variants) - - filtered_results = [result for result in funnel_results if result[0]["breakdown_value"][0] in self.variants] - - control_variant, test_variants = self.get_variants(filtered_results) - - probabilities = calculate_probabilities(control_variant, test_variants) - - mapping = { - variant.key: probability - for variant, probability in zip([control_variant, *test_variants], probabilities) - } - - significance_code, loss = are_results_significant(control_variant, test_variants, probabilities) - - credible_intervals = calculate_credible_intervals([control_variant, *test_variants]) - except ValidationError: - if validate: - raise - else: - return basic_result_props - - return { - **basic_result_props, - "probability": mapping, - "significant": significance_code == ExperimentSignificanceCode.SIGNIFICANT, - "significance_code": significance_code, - "expected_loss": loss, - "variants": [asdict(variant) for variant in [control_variant, *test_variants]], - "credible_intervals": credible_intervals, - } - - def get_variants(self, funnel_results): - control_variant = None - test_variants = [] - for result in funnel_results: - total = result[0]["count"] - success = result[-1]["count"] - failure = total - success - breakdown_value = result[0]["breakdown_value"][0] - if breakdown_value == CONTROL_VARIANT_KEY: - control_variant = Variant( - key=breakdown_value, - success_count=int(success), - failure_count=int(failure), - ) - else: - test_variants.append(Variant(breakdown_value, int(success), int(failure))) - - return control_variant, test_variants - - -def validate_event_variants(funnel_results, variants): - errors = { - ExperimentNoResultsErrorKeys.NO_EVENTS: True, - ExperimentNoResultsErrorKeys.NO_FLAG_INFO: True, - ExperimentNoResultsErrorKeys.NO_CONTROL_VARIANT: True, - ExperimentNoResultsErrorKeys.NO_TEST_VARIANT: True, - } - - if not funnel_results or not funnel_results[0]: - raise ValidationError(code="no-results", detail=json.dumps(errors)) - - errors[ExperimentNoResultsErrorKeys.NO_EVENTS] = False - - # Funnels: the first step must be present for *any* results to show up - eventsWithOrderZero = [] - for eventArr in funnel_results: - for event in eventArr: - if event.get("order") == 0: - eventsWithOrderZero.append(event) - - # Check if "control" is present - for event in eventsWithOrderZero: - event_variant = event.get("breakdown_value")[0] - if event_variant == "control": - errors[ExperimentNoResultsErrorKeys.NO_CONTROL_VARIANT] = False - errors[ExperimentNoResultsErrorKeys.NO_FLAG_INFO] = False - break - - # Check if at least one of the test variants is present - test_variants = [variant for variant in variants if variant != "control"] - for event in eventsWithOrderZero: - event_variant = event.get("breakdown_value")[0] - if event_variant in test_variants: - errors[ExperimentNoResultsErrorKeys.NO_TEST_VARIANT] = False - errors[ExperimentNoResultsErrorKeys.NO_FLAG_INFO] = False - break - - has_errors = any(errors.values()) - if has_errors: - raise ValidationError(detail=json.dumps(errors)) diff --git a/ee/clickhouse/queries/experiments/secondary_experiment_result.py b/ee/clickhouse/queries/experiments/secondary_experiment_result.py deleted file mode 100644 index bd485c4362..0000000000 --- a/ee/clickhouse/queries/experiments/secondary_experiment_result.py +++ /dev/null @@ -1,84 +0,0 @@ -from datetime import datetime -from typing import Optional - -from rest_framework.exceptions import ValidationError -from ee.clickhouse.queries.experiments.funnel_experiment_result import ClickhouseFunnelExperimentResult -from ee.clickhouse.queries.experiments.trend_experiment_result import ( - ClickhouseTrendExperimentResult, - uses_math_aggregation_by_user_or_property_value, -) - -from posthog.constants import INSIGHT_FUNNELS, INSIGHT_TRENDS -from posthog.models.feature_flag import FeatureFlag -from posthog.models.filters.filter import Filter -from posthog.models.team import Team - - -class ClickhouseSecondaryExperimentResult: - """ - This class calculates secondary metric values for Experiments. - It returns value of metric for each variant. - - We adjust the metric filter based on Experiment parameters. - """ - - def __init__( - self, - filter: Filter, - team: Team, - feature_flag: FeatureFlag, - experiment_start_date: datetime, - experiment_end_date: Optional[datetime] = None, - ): - self.variants = [variant["key"] for variant in feature_flag.variants] - self.team = team - self.feature_flag = feature_flag - self.filter = filter - self.experiment_start_date = experiment_start_date - self.experiment_end_date = experiment_end_date - - def get_results(self): - if self.filter.insight == INSIGHT_TRENDS: - significance_results = ClickhouseTrendExperimentResult( - self.filter, self.team, self.feature_flag, self.experiment_start_date, self.experiment_end_date - ).get_results(validate=False) - variants = self.get_trend_count_data_for_variants(significance_results["insight"]) - - elif self.filter.insight == INSIGHT_FUNNELS: - significance_results = ClickhouseFunnelExperimentResult( - self.filter, self.team, self.feature_flag, self.experiment_start_date, self.experiment_end_date - ).get_results(validate=False) - variants = self.get_funnel_conversion_rate_for_variants(significance_results["insight"]) - - else: - raise ValidationError("Secondary metrics need to be funnel or trend insights") - - return {"result": variants, **significance_results} - - def get_funnel_conversion_rate_for_variants(self, insight_results) -> dict[str, float]: - variants = {} - for result in insight_results: - total = result[0]["count"] - success = result[-1]["count"] - breakdown_value = result[0]["breakdown_value"][0] - - if breakdown_value in self.variants: - variants[breakdown_value] = round(int(success) / int(total), 3) - - return variants - - def get_trend_count_data_for_variants(self, insight_results) -> dict[str, float]: - # this assumes the Trend insight is Cumulative, unless using count per user - variants = {} - - for result in insight_results: - count = result["count"] - breakdown_value = result["breakdown_value"] - - if uses_math_aggregation_by_user_or_property_value(self.filter): - count = result["count"] / len(result.get("data", [0])) - - if breakdown_value in self.variants: - variants[breakdown_value] = count - - return variants diff --git a/ee/clickhouse/queries/experiments/test_funnel_experiment_result.py b/ee/clickhouse/queries/experiments/test_funnel_experiment_result.py deleted file mode 100644 index 55fca255ed..0000000000 --- a/ee/clickhouse/queries/experiments/test_funnel_experiment_result.py +++ /dev/null @@ -1,561 +0,0 @@ -import unittest -from functools import lru_cache -from math import exp, lgamma, log, ceil - -from flaky import flaky - -from posthog.hogql_queries.experiments.funnels_statistics import ( - are_results_significant, - calculate_expected_loss, - calculate_probabilities, - calculate_credible_intervals as calculate_funnel_credible_intervals, -) -from posthog.schema import ExperimentSignificanceCode, ExperimentVariantFunnelsBaseStats - -Probability = float - - -@lru_cache(maxsize=100000) -def logbeta(x: int, y: int) -> float: - return lgamma(x) + lgamma(y) - lgamma(x + y) - - -# Helper function to calculate probability using a different method than the one used in actual code -# calculation: https://www.evanmiller.org/bayesian-ab-testing.html#binary_ab - - -def calculate_probability_of_winning_for_target( - target_variant: ExperimentVariantFunnelsBaseStats, other_variants: list[ExperimentVariantFunnelsBaseStats] -) -> Probability: - """ - Calculates the probability of winning for target variant. - """ - target = target_variant.success_count + 1, target_variant.failure_count + 1 - variants = [(variant.success_count + 1, variant.failure_count + 1) for variant in other_variants] - - if len(variants) == 1: - # simple case - return probability_B_beats_A(variants[0][0], variants[0][1], target[0], target[1]) - - elif len(variants) == 2: - return probability_C_beats_A_and_B( - variants[0][0], - variants[0][1], - variants[1][0], - variants[1][1], - target[0], - target[1], - ) - - elif len(variants) == 3: - return probability_D_beats_A_B_and_C( - variants[0][0], - variants[0][1], - variants[1][0], - variants[1][1], - variants[2][0], - variants[2][1], - target[0], - target[1], - ) - else: - return 0 - - -def probability_B_beats_A(A_success: float, A_failure: float, B_success: float, B_failure: float) -> Probability: - total: Probability = 0 - for i in range(ceil(B_success)): - total += exp( - logbeta(A_success + i, A_failure + B_failure) - - log(B_failure + i) - - logbeta(1 + i, B_failure) - - logbeta(A_success, A_failure) - ) - - return total - - -def probability_C_beats_A_and_B( - A_success: float, - A_failure: float, - B_success: float, - B_failure: float, - C_success: float, - C_failure: float, -): - total: Probability = 0 - for i in range(ceil(A_success)): - for j in range(ceil(B_success)): - total += exp( - logbeta(C_success + i + j, C_failure + A_failure + B_failure) - - log(A_failure + i) - - log(B_failure + j) - - logbeta(1 + i, A_failure) - - logbeta(1 + j, B_failure) - - logbeta(C_success, C_failure) - ) - - return ( - 1 - - probability_B_beats_A(C_success, C_failure, A_success, A_failure) - - probability_B_beats_A(C_success, C_failure, B_success, B_failure) - + total - ) - - -def probability_D_beats_A_B_and_C( - A_success: float, - A_failure: float, - B_success: float, - B_failure: float, - C_success: float, - C_failure: float, - D_success: float, - D_failure: float, -): - total: Probability = 0 - for i in range(ceil(A_success)): - for j in range(ceil(B_success)): - for k in range(ceil(C_success)): - total += exp( - logbeta( - D_success + i + j + k, - D_failure + A_failure + B_failure + C_failure, - ) - - log(A_failure + i) - - log(B_failure + j) - - log(C_failure + k) - - logbeta(1 + i, A_failure) - - logbeta(1 + j, B_failure) - - logbeta(1 + k, C_failure) - - logbeta(D_success, D_failure) - ) - - return ( - 1 - - probability_B_beats_A(A_success, A_failure, D_success, D_failure) - - probability_B_beats_A(B_success, B_failure, D_success, D_failure) - - probability_B_beats_A(C_success, C_failure, D_success, D_failure) - + probability_C_beats_A_and_B(A_success, A_failure, B_success, B_failure, D_success, D_failure) - + probability_C_beats_A_and_B(A_success, A_failure, C_success, C_failure, D_success, D_failure) - + probability_C_beats_A_and_B(B_success, B_failure, C_success, C_failure, D_success, D_failure) - - total - ) - - -@flaky(max_runs=10, min_passes=1) -class TestFunnelExperimentCalculator(unittest.TestCase): - def test_calculate_results(self): - variant_test = ExperimentVariantFunnelsBaseStats(key="A", success_count=100, failure_count=10) - variant_control = ExperimentVariantFunnelsBaseStats(key="B", success_count=100, failure_count=18) - - _, probability = calculate_probabilities(variant_control, [variant_test]) - self.assertAlmostEqual(probability, 0.918, places=2) - - significant, loss = are_results_significant(variant_control, [variant_test], [probability]) - self.assertAlmostEqual(loss, 0.0016, places=3) - self.assertEqual(significant, ExperimentSignificanceCode.SIGNIFICANT) - - credible_intervals = calculate_funnel_credible_intervals([variant_control, variant_test]) - # Cross-checked with: https://www.causascientia.org/math_stat/ProportionCI.html - self.assertAlmostEqual(credible_intervals[variant_control.key][0], 0.7715, places=3) - self.assertAlmostEqual(credible_intervals[variant_control.key][1], 0.9010, places=3) - self.assertAlmostEqual(credible_intervals[variant_test.key][0], 0.8405, places=3) - self.assertAlmostEqual(credible_intervals[variant_test.key][1], 0.9494, places=3) - - def test_simulation_result_is_close_to_closed_form_solution(self): - variant_test = ExperimentVariantFunnelsBaseStats(key="A", success_count=100, failure_count=10) - variant_control = ExperimentVariantFunnelsBaseStats(key="B", success_count=100, failure_count=18) - - _, probability = calculate_probabilities(variant_control, [variant_test]) - self.assertAlmostEqual(probability, 0.918, places=1) - - alternative_probability = calculate_probability_of_winning_for_target(variant_test, [variant_control]) - self.assertAlmostEqual(probability, alternative_probability, places=1) - - def test_calculate_results_for_two_test_variants(self): - variant_test_1 = ExperimentVariantFunnelsBaseStats(key="A", success_count=100, failure_count=10) - variant_test_2 = ExperimentVariantFunnelsBaseStats(key="B", success_count=100, failure_count=3) - variant_control = ExperimentVariantFunnelsBaseStats(key="C", success_count=100, failure_count=18) - - probabilities = calculate_probabilities(variant_control, [variant_test_1, variant_test_2]) - self.assertAlmostEqual(sum(probabilities), 1) - self.assertAlmostEqual(probabilities[0], 0.0, places=1) - self.assertAlmostEqual(probabilities[1], 0.033, places=1) - self.assertAlmostEqual(probabilities[2], 0.967, places=1) - - alternative_probability_for_control = calculate_probability_of_winning_for_target( - variant_control, [variant_test_1, variant_test_2] - ) - self.assertAlmostEqual(probabilities[0], alternative_probability_for_control, places=2) - - self.assertAlmostEqual( - calculate_expected_loss(variant_test_2, [variant_control, variant_test_1]), - 0.0004, - places=3, - ) - - # this loss only checks variant 2 against control - significant, loss = are_results_significant(variant_control, [variant_test_1, variant_test_2], probabilities) - self.assertAlmostEqual(loss, 0.00000, places=3) - self.assertEqual(significant, ExperimentSignificanceCode.SIGNIFICANT) - - credible_intervals = calculate_funnel_credible_intervals([variant_control, variant_test_1, variant_test_2]) - # Cross-checked with: https://www.causascientia.org/math_stat/ProportionCI.html - self.assertAlmostEqual(credible_intervals[variant_control.key][0], 0.7715, places=3) - self.assertAlmostEqual(credible_intervals[variant_control.key][1], 0.9010, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_1.key][0], 0.8405, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_1.key][1], 0.9494, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_2.key][0], 0.9180, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_2.key][1], 0.9894, places=3) - - def test_calculate_results_for_two_test_variants_almost_equal(self): - variant_test_1 = ExperimentVariantFunnelsBaseStats(key="A", success_count=120, failure_count=60) - variant_test_2 = ExperimentVariantFunnelsBaseStats(key="B", success_count=110, failure_count=52) - variant_control = ExperimentVariantFunnelsBaseStats(key="C", success_count=130, failure_count=65) - - probabilities = calculate_probabilities(variant_control, [variant_test_1, variant_test_2]) - self.assertAlmostEqual(sum(probabilities), 1) - self.assertAlmostEqual(probabilities[0], 0.277, places=1) - self.assertAlmostEqual(probabilities[1], 0.282, places=1) - self.assertAlmostEqual(probabilities[2], 0.440, places=1) - - alternative_probability_for_control = calculate_probability_of_winning_for_target( - variant_control, [variant_test_1, variant_test_2] - ) - self.assertAlmostEqual(probabilities[0], alternative_probability_for_control, places=1) - - self.assertAlmostEqual( - calculate_expected_loss(variant_test_2, [variant_control, variant_test_1]), - 0.022, - places=2, - ) - - significant, loss = are_results_significant(variant_control, [variant_test_1, variant_test_2], probabilities) - self.assertAlmostEqual(loss, 1, places=3) - self.assertEqual(significant, ExperimentSignificanceCode.LOW_WIN_PROBABILITY) - - credible_intervals = calculate_funnel_credible_intervals([variant_control, variant_test_1, variant_test_2]) - # Cross-checked with: https://www.causascientia.org/math_stat/ProportionCI.html - self.assertAlmostEqual(credible_intervals[variant_control.key][0], 0.5977, places=3) - self.assertAlmostEqual(credible_intervals[variant_control.key][1], 0.7290, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_1.key][0], 0.5948, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_1.key][1], 0.7314, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_2.key][0], 0.6035, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_2.key][1], 0.7460, places=3) - - def test_absolute_loss_less_than_one_percent_but_not_significant(self): - variant_test_1 = ExperimentVariantFunnelsBaseStats(key="A", success_count=286, failure_count=2014) - variant_control = ExperimentVariantFunnelsBaseStats(key="B", success_count=267, failure_count=2031) - - probabilities = calculate_probabilities(variant_control, [variant_test_1]) - self.assertAlmostEqual(sum(probabilities), 1) - self.assertAlmostEqual(probabilities[0], 0.197, places=1) - self.assertAlmostEqual(probabilities[1], 0.802, places=1) - - self.assertAlmostEqual(calculate_expected_loss(variant_test_1, [variant_control]), 0.0010, places=3) - - significant, loss = are_results_significant(variant_control, [variant_test_1], probabilities) - self.assertAlmostEqual(loss, 1, places=3) - self.assertEqual(significant, ExperimentSignificanceCode.LOW_WIN_PROBABILITY) - - credible_intervals = calculate_funnel_credible_intervals([variant_control, variant_test_1]) - # Cross-checked with: https://www.causascientia.org/math_stat/ProportionCI.html - self.assertAlmostEqual(credible_intervals[variant_control.key][0], 0.1037, places=3) - self.assertAlmostEqual(credible_intervals[variant_control.key][1], 0.1299, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_1.key][0], 0.1114, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_1.key][1], 0.1384, places=3) - - def test_calculate_results_for_three_test_variants(self): - variant_test_1 = ExperimentVariantFunnelsBaseStats(key="A", success_count=100, failure_count=10) - variant_test_2 = ExperimentVariantFunnelsBaseStats(key="B", success_count=100, failure_count=3) - variant_test_3 = ExperimentVariantFunnelsBaseStats(key="C", success_count=100, failure_count=30) - variant_control = ExperimentVariantFunnelsBaseStats(key="D", success_count=100, failure_count=18) - - probabilities = calculate_probabilities(variant_control, [variant_test_1, variant_test_2, variant_test_3]) - self.assertAlmostEqual(sum(probabilities), 1) - self.assertAlmostEqual(probabilities[0], 0.0, places=1) - self.assertAlmostEqual(probabilities[1], 0.033, places=1) - self.assertAlmostEqual(probabilities[2], 0.967, places=1) - self.assertAlmostEqual(probabilities[3], 0.0, places=1) - - alternative_probability_for_control = calculate_probability_of_winning_for_target( - variant_control, [variant_test_1, variant_test_2, variant_test_3] - ) - - self.assertAlmostEqual(probabilities[0], alternative_probability_for_control, places=1) - - self.assertAlmostEqual( - calculate_expected_loss(variant_test_2, [variant_control, variant_test_1, variant_test_3]), - 0.0004, - places=2, - ) - - significant, loss = are_results_significant( - variant_control, - [variant_test_1, variant_test_2, variant_test_3], - probabilities, - ) - self.assertAlmostEqual(loss, 0.0004, places=2) - self.assertEqual(significant, ExperimentSignificanceCode.SIGNIFICANT) - - credible_intervals = calculate_funnel_credible_intervals( - [variant_control, variant_test_1, variant_test_2, variant_test_3] - ) - # Cross-checked with: https://www.causascientia.org/math_stat/ProportionCI.html - self.assertAlmostEqual(credible_intervals[variant_control.key][0], 0.7715, places=3) - self.assertAlmostEqual(credible_intervals[variant_control.key][1], 0.9010, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_1.key][0], 0.8405, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_1.key][1], 0.9494, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_2.key][0], 0.9180, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_2.key][1], 0.9894, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_3.key][0], 0.6894, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_3.key][1], 0.8332, places=3) - - def test_calculate_results_for_three_test_variants_almost_equal(self): - variant_test_1 = ExperimentVariantFunnelsBaseStats(key="A", success_count=120, failure_count=60) - variant_test_2 = ExperimentVariantFunnelsBaseStats(key="B", success_count=110, failure_count=52) - variant_test_3 = ExperimentVariantFunnelsBaseStats(key="C", success_count=100, failure_count=46) - variant_control = ExperimentVariantFunnelsBaseStats(key="D", success_count=130, failure_count=65) - - probabilities = calculate_probabilities(variant_control, [variant_test_1, variant_test_2, variant_test_3]) - self.assertAlmostEqual(sum(probabilities), 1) - self.assertAlmostEqual(probabilities[0], 0.168, places=1) - self.assertAlmostEqual(probabilities[1], 0.174, places=1) - self.assertAlmostEqual(probabilities[2], 0.292, places=1) - self.assertAlmostEqual(probabilities[3], 0.365, places=1) - - alternative_probability_for_control = calculate_probability_of_winning_for_target( - variant_control, [variant_test_1, variant_test_2, variant_test_3] - ) - self.assertAlmostEqual(probabilities[0], alternative_probability_for_control, places=1) - - self.assertAlmostEqual( - calculate_expected_loss(variant_test_2, [variant_control, variant_test_1, variant_test_3]), - 0.033, - places=2, - ) - - # passing in artificial probabilities to subvert the low_probability threshold - significant, loss = are_results_significant( - variant_control, [variant_test_1, variant_test_2, variant_test_3], [1, 0] - ) - self.assertAlmostEqual(loss, 0.012, places=2) - self.assertEqual(significant, ExperimentSignificanceCode.HIGH_LOSS) - - credible_intervals = calculate_funnel_credible_intervals( - [variant_control, variant_test_1, variant_test_2, variant_test_3] - ) - # Cross-checked with: https://www.causascientia.org/math_stat/ProportionCI.html - self.assertAlmostEqual(credible_intervals[variant_control.key][0], 0.5977, places=3) - self.assertAlmostEqual(credible_intervals[variant_control.key][1], 0.7290, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_1.key][0], 0.5948, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_1.key][1], 0.7314, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_2.key][0], 0.6035, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_2.key][1], 0.7460, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_3.key][0], 0.6054, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_3.key][1], 0.7547, places=3) - - def test_calculate_results_for_three_test_variants_much_better_than_control(self): - variant_test_1 = ExperimentVariantFunnelsBaseStats(key="A", success_count=130, failure_count=60) - variant_test_2 = ExperimentVariantFunnelsBaseStats(key="B", success_count=135, failure_count=62) - variant_test_3 = ExperimentVariantFunnelsBaseStats(key="C", success_count=132, failure_count=60) - variant_control = ExperimentVariantFunnelsBaseStats(key="D", success_count=80, failure_count=65) - - probabilities = calculate_probabilities(variant_control, [variant_test_1, variant_test_2, variant_test_3]) - self.assertAlmostEqual(sum(probabilities), 1) - - alternative_probability_for_control = calculate_probability_of_winning_for_target( - variant_control, [variant_test_1, variant_test_2, variant_test_3] - ) - self.assertAlmostEqual(probabilities[0], alternative_probability_for_control, places=1) - - significant, loss = are_results_significant( - variant_control, - [variant_test_1, variant_test_2, variant_test_3], - probabilities, - ) - self.assertAlmostEqual(loss, 0, places=2) - self.assertEqual(significant, ExperimentSignificanceCode.SIGNIFICANT) - - credible_intervals = calculate_funnel_credible_intervals( - [variant_control, variant_test_1, variant_test_2, variant_test_3] - ) - # Cross-checked with: https://www.causascientia.org/math_stat/ProportionCI.html - self.assertAlmostEqual(credible_intervals[variant_control.key][0], 0.4703, places=3) - self.assertAlmostEqual(credible_intervals[variant_control.key][1], 0.6303, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_1.key][0], 0.6148, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_1.key][1], 0.7460, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_2.key][0], 0.6172, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_2.key][1], 0.7460, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_3.key][0], 0.6186, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_3.key][1], 0.7488, places=3) - - def test_calculate_results_for_seven_test_variants(self): - variant_test_1 = ExperimentVariantFunnelsBaseStats(key="A", success_count=100, failure_count=17) - variant_test_2 = ExperimentVariantFunnelsBaseStats(key="B", success_count=100, failure_count=16) - variant_test_3 = ExperimentVariantFunnelsBaseStats(key="C", success_count=100, failure_count=30) - variant_test_4 = ExperimentVariantFunnelsBaseStats(key="D", success_count=100, failure_count=31) - variant_test_5 = ExperimentVariantFunnelsBaseStats(key="E", success_count=100, failure_count=29) - variant_test_6 = ExperimentVariantFunnelsBaseStats(key="F", success_count=100, failure_count=32) - variant_test_7 = ExperimentVariantFunnelsBaseStats(key="G", success_count=100, failure_count=33) - variant_control = ExperimentVariantFunnelsBaseStats(key="H", success_count=100, failure_count=18) - - probabilities = calculate_probabilities( - variant_control, - [ - variant_test_1, - variant_test_2, - variant_test_3, - variant_test_4, - variant_test_5, - variant_test_6, - variant_test_7, - ], - ) - self.assertAlmostEqual(sum(probabilities), 1) - self.assertAlmostEqual(probabilities[0], 0.241, places=1) - self.assertAlmostEqual(probabilities[1], 0.322, places=1) - self.assertAlmostEqual(probabilities[2], 0.425, places=1) - self.assertAlmostEqual(probabilities[3], 0.002, places=2) - self.assertAlmostEqual(probabilities[4], 0.001, places=2) - self.assertAlmostEqual(probabilities[5], 0.004, places=2) - self.assertAlmostEqual(probabilities[6], 0.001, places=2) - self.assertAlmostEqual(probabilities[7], 0.0, places=2) - - self.assertAlmostEqual( - calculate_expected_loss( - variant_test_2, - [ - variant_control, - variant_test_1, - variant_test_3, - variant_test_4, - variant_test_5, - variant_test_6, - variant_test_7, - ], - ), - 0.0208, - places=2, - ) - - significant, loss = are_results_significant( - variant_control, - [ - variant_test_1, - variant_test_2, - variant_test_3, - variant_test_4, - variant_test_5, - variant_test_6, - variant_test_7, - ], - probabilities, - ) - self.assertAlmostEqual(loss, 1, places=2) - self.assertEqual(significant, ExperimentSignificanceCode.LOW_WIN_PROBABILITY) - - credible_intervals = calculate_funnel_credible_intervals( - [ - variant_control, - variant_test_1, - variant_test_2, - variant_test_3, - variant_test_4, - variant_test_5, - variant_test_6, - variant_test_7, - ] - ) - # Cross-checked with: https://www.causascientia.org/math_stat/ProportionCI.html - self.assertAlmostEqual(credible_intervals[variant_control.key][0], 0.7715, places=3) - self.assertAlmostEqual(credible_intervals[variant_control.key][1], 0.9010, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_1.key][0], 0.7793, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_1.key][1], 0.9070, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_2.key][0], 0.7874, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_2.key][1], 0.9130, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_3.key][0], 0.6894, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_3.key][1], 0.8332, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_4.key][0], 0.6835, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_4.key][1], 0.8278, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_5.key][0], 0.6955, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_5.key][1], 0.8385, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_6.key][0], 0.6776, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_6.key][1], 0.8226, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_7.key][0], 0.6718, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_7.key][1], 0.8174, places=3) - - def test_calculate_results_control_is_significant(self): - variant_test = ExperimentVariantFunnelsBaseStats(key="test", success_count=100, failure_count=18) - variant_control = ExperimentVariantFunnelsBaseStats(key="control", success_count=100, failure_count=10) - - probabilities = calculate_probabilities(variant_control, [variant_test]) - - self.assertAlmostEqual(probabilities[0], 0.918, places=2) - - significant, loss = are_results_significant(variant_control, [variant_test], probabilities) - - self.assertAlmostEqual(loss, 0.0016, places=3) - self.assertEqual(significant, ExperimentSignificanceCode.SIGNIFICANT) - - credible_intervals = calculate_funnel_credible_intervals([variant_control, variant_test]) - # Cross-checked with: https://www.causascientia.org/math_stat/ProportionCI.html - self.assertAlmostEqual(credible_intervals[variant_control.key][0], 0.8405, places=3) - self.assertAlmostEqual(credible_intervals[variant_control.key][1], 0.9494, places=3) - self.assertAlmostEqual(credible_intervals[variant_test.key][0], 0.7715, places=3) - self.assertAlmostEqual(credible_intervals[variant_test.key][1], 0.9010, places=3) - - def test_calculate_results_many_variants_control_is_significant(self): - variant_test_1 = ExperimentVariantFunnelsBaseStats(key="test_1", success_count=100, failure_count=20) - variant_test_2 = ExperimentVariantFunnelsBaseStats(key="test_2", success_count=100, failure_count=21) - variant_test_3 = ExperimentVariantFunnelsBaseStats(key="test_3", success_count=100, failure_count=22) - variant_test_4 = ExperimentVariantFunnelsBaseStats(key="test_4", success_count=100, failure_count=23) - variant_test_5 = ExperimentVariantFunnelsBaseStats(key="test_5", success_count=100, failure_count=24) - variant_test_6 = ExperimentVariantFunnelsBaseStats(key="test_6", success_count=100, failure_count=25) - variant_control = ExperimentVariantFunnelsBaseStats(key="control", success_count=100, failure_count=10) - - variants_test = [ - variant_test_1, - variant_test_2, - variant_test_3, - variant_test_4, - variant_test_5, - variant_test_6, - ] - - probabilities = calculate_probabilities(variant_control, variants_test) - - self.assertAlmostEqual(probabilities[0], 0.901, places=2) - - significant, loss = are_results_significant(variant_control, variants_test, probabilities) - - self.assertAlmostEqual(loss, 0.0008, places=3) - self.assertEqual(significant, ExperimentSignificanceCode.SIGNIFICANT) - - credible_intervals = calculate_funnel_credible_intervals( - [ - variant_control, - variant_test_1, - variant_test_2, - variant_test_3, - variant_test_4, - variant_test_5, - variant_test_6, - ] - ) - # Cross-checked with: https://www.causascientia.org/math_stat/ProportionCI.html - self.assertAlmostEqual(credible_intervals[variant_control.key][0], 0.8405, places=3) - self.assertAlmostEqual(credible_intervals[variant_control.key][1], 0.9494, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_1.key][0], 0.7563, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_1.key][1], 0.8892, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_2.key][0], 0.7489, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_2.key][1], 0.8834, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_3.key][0], 0.7418, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_3.key][1], 0.8776, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_4.key][0], 0.7347, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_4.key][1], 0.8718, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_5.key][0], 0.7279, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_5.key][1], 0.8661, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_6.key][0], 0.7211, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_6.key][1], 0.8605, places=3) diff --git a/ee/clickhouse/queries/experiments/test_trend_experiment_result.py b/ee/clickhouse/queries/experiments/test_trend_experiment_result.py deleted file mode 100644 index de983e6f14..0000000000 --- a/ee/clickhouse/queries/experiments/test_trend_experiment_result.py +++ /dev/null @@ -1,240 +0,0 @@ -import unittest -from functools import lru_cache -from math import exp, lgamma, log, ceil - -from flaky import flaky - -from posthog.hogql_queries.experiments.trends_statistics import ( - are_results_significant, - calculate_credible_intervals, - calculate_p_value, - calculate_probabilities, -) -from posthog.schema import ExperimentSignificanceCode, ExperimentVariantTrendsBaseStats - -Probability = float - - -@lru_cache(maxsize=100000) -def logbeta(x: float, y: float) -> float: - return lgamma(x) + lgamma(y) - lgamma(x + y) - - -# Helper function to calculate probability using a different method than the one used in actual code -# calculation: https://www.evanmiller.org/bayesian-ab-testing.html#count_ab -def calculate_probability_of_winning_for_target_count_data( - target_variant: ExperimentVariantTrendsBaseStats, other_variants: list[ExperimentVariantTrendsBaseStats] -) -> Probability: - """ - Calculates the probability of winning for target variant. - """ - target = 1 + target_variant.count, target_variant.exposure - variants = [(1 + variant.count, variant.exposure) for variant in other_variants] - - if len(variants) == 1: - # simple case - return probability_B_beats_A_count_data(variants[0][0], variants[0][1], target[0], target[1]) - - elif len(variants) == 2: - return probability_C_beats_A_and_B_count_data( - variants[0][0], - variants[0][1], - variants[1][0], - variants[1][1], - target[0], - target[1], - ) - else: - return 0 - - -def probability_B_beats_A_count_data( - A_count: float, A_exposure: float, B_count: float, B_exposure: float -) -> Probability: - total: Probability = 0 - for i in range(ceil(B_count)): - total += exp( - i * log(B_exposure) - + A_count * log(A_exposure) - - (i + A_count) * log(B_exposure + A_exposure) - - log(i + A_count) - - logbeta(i + 1, A_count) - ) - - return total - - -def probability_C_beats_A_and_B_count_data( - A_count: float, - A_exposure: float, - B_count: float, - B_exposure: float, - C_count: float, - C_exposure: float, -) -> Probability: - total: Probability = 0 - - for i in range(ceil(B_count)): - for j in range(ceil(A_count)): - total += exp( - i * log(B_exposure) - + j * log(A_exposure) - + C_count * log(C_exposure) - - (i + j + C_count) * log(B_exposure + A_exposure + C_exposure) - + lgamma(i + j + C_count) - - lgamma(i + 1) - - lgamma(j + 1) - - lgamma(C_count) - ) - return ( - 1 - - probability_B_beats_A_count_data(C_count, C_exposure, A_count, A_exposure) - - probability_B_beats_A_count_data(C_count, C_exposure, B_count, B_exposure) - + total - ) - - -@flaky(max_runs=10, min_passes=1) -class TestTrendExperimentCalculator(unittest.TestCase): - def test_calculate_results(self): - variant_control = ExperimentVariantTrendsBaseStats(key="A", count=20, exposure=1, absolute_exposure=200) - variant_test = ExperimentVariantTrendsBaseStats(key="B", count=30, exposure=1, absolute_exposure=200) - - probabilities = calculate_probabilities(variant_control, [variant_test]) - self.assertAlmostEqual(probabilities[1], 0.92, places=1) - - computed_probability = calculate_probability_of_winning_for_target_count_data(variant_test, [variant_control]) - self.assertAlmostEqual(probabilities[1], computed_probability, places=1) - - # p value testing matches https://www.evanmiller.org/ab-testing/poisson-means.html - p_value = calculate_p_value(variant_control, [variant_test]) - self.assertAlmostEqual(p_value, 0.20, places=2) - - credible_intervals = calculate_credible_intervals([variant_control, variant_test]) - self.assertAlmostEqual(credible_intervals[variant_control.key][0], 0.0650, places=3) - self.assertAlmostEqual(credible_intervals[variant_control.key][1], 0.1544, places=3) - self.assertAlmostEqual(credible_intervals[variant_test.key][0], 0.1053, places=3) - self.assertAlmostEqual(credible_intervals[variant_test.key][1], 0.2141, places=3) - - def test_calculate_results_small_numbers(self): - variant_control = ExperimentVariantTrendsBaseStats(key="A", count=2, exposure=1, absolute_exposure=200) - variant_test = ExperimentVariantTrendsBaseStats(key="B", count=1, exposure=1, absolute_exposure=200) - - probabilities = calculate_probabilities(variant_control, [variant_test]) - self.assertAlmostEqual(probabilities[1], 0.31, places=1) - - computed_probability = calculate_probability_of_winning_for_target_count_data(variant_test, [variant_control]) - self.assertAlmostEqual(probabilities[1], computed_probability, places=1) - - p_value = calculate_p_value(variant_control, [variant_test]) - self.assertAlmostEqual(p_value, 1, places=2) - - credible_intervals = calculate_credible_intervals([variant_control, variant_test]) - self.assertAlmostEqual(credible_intervals[variant_control.key][0], 0.0031, places=3) - self.assertAlmostEqual(credible_intervals[variant_control.key][1], 0.0361, places=3) - self.assertAlmostEqual(credible_intervals[variant_test.key][0], 0.0012, places=3) - self.assertAlmostEqual(credible_intervals[variant_test.key][1], 0.0279, places=3) - - def test_calculate_count_data_probability(self): - probability = probability_B_beats_A_count_data(15, 1, 30, 1) - - # same relative exposure should give same results - probability2 = probability_B_beats_A_count_data(15, 10, 30, 10) - - self.assertAlmostEqual(probability, 0.988, places=1) - self.assertAlmostEqual(probability, probability2) - - def test_calculate_results_with_three_variants(self): - variant_control = ExperimentVariantTrendsBaseStats(key="A", count=20, exposure=1, absolute_exposure=200) - variant_test_1 = ExperimentVariantTrendsBaseStats(key="B", count=26, exposure=1, absolute_exposure=200) - variant_test_2 = ExperimentVariantTrendsBaseStats(key="C", count=19, exposure=1, absolute_exposure=200) - - probabilities = calculate_probabilities(variant_control, [variant_test_1, variant_test_2]) - self.assertAlmostEqual(probabilities[0], 0.16, places=1) - self.assertAlmostEqual(probabilities[1], 0.72, places=1) - self.assertAlmostEqual(probabilities[2], 0.12, places=1) - - computed_probability = calculate_probability_of_winning_for_target_count_data( - variant_control, [variant_test_1, variant_test_2] - ) - self.assertAlmostEqual(probabilities[0], computed_probability, places=1) - - p_value = calculate_p_value(variant_control, [variant_test_1, variant_test_2]) - self.assertAlmostEqual(p_value, 0.46, places=2) - - credible_intervals = calculate_credible_intervals([variant_control, variant_test_1, variant_test_2]) - self.assertAlmostEqual(credible_intervals[variant_control.key][0], 0.0650, places=3) - self.assertAlmostEqual(credible_intervals[variant_control.key][1], 0.1544, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_1.key][0], 0.0890, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_1.key][1], 0.1905, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_2.key][0], 0.0611, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_2.key][1], 0.1484, places=3) - - def test_calculate_significance_when_target_variants_underperform(self): - variant_control = ExperimentVariantTrendsBaseStats(key="A", count=250, exposure=1, absolute_exposure=200) - variant_test_1 = ExperimentVariantTrendsBaseStats(key="B", count=180, exposure=1, absolute_exposure=200) - variant_test_2 = ExperimentVariantTrendsBaseStats(key="C", count=50, exposure=1, absolute_exposure=200) - - # in this case, should choose B as best test variant - p_value = calculate_p_value(variant_control, [variant_test_1, variant_test_2]) - self.assertAlmostEqual(p_value, 0.001, places=3) - - # manually assign probabilities to control test case - significant, p_value = are_results_significant( - variant_control, [variant_test_1, variant_test_2], [0.5, 0.4, 0.1] - ) - self.assertAlmostEqual(p_value, 1, places=3) - self.assertEqual(significant, ExperimentSignificanceCode.LOW_WIN_PROBABILITY) - - # new B variant is worse, such that control probability ought to be high enough - variant_test_1 = ExperimentVariantTrendsBaseStats(key="B", count=100, exposure=1, absolute_exposure=200) - - significant, p_value = are_results_significant( - variant_control, [variant_test_1, variant_test_2], [0.95, 0.03, 0.02] - ) - self.assertAlmostEqual(p_value, 0, places=3) - self.assertEqual(significant, ExperimentSignificanceCode.SIGNIFICANT) - - credible_intervals = calculate_credible_intervals([variant_control, variant_test_1, variant_test_2]) - self.assertAlmostEqual(credible_intervals[variant_control.key][0], 1.1045, places=3) - self.assertAlmostEqual(credible_intervals[variant_control.key][1], 1.4149, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_1.key][0], 0.4113, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_1.key][1], 0.6081, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_2.key][0], 0.1898, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_2.key][1], 0.3295, places=3) - - def test_results_with_different_exposures(self): - variant_control = ExperimentVariantTrendsBaseStats(key="A", count=50, exposure=1.3, absolute_exposure=260) - variant_test_1 = ExperimentVariantTrendsBaseStats(key="B", count=30, exposure=1.8, absolute_exposure=360) - variant_test_2 = ExperimentVariantTrendsBaseStats(key="C", count=20, exposure=0.7, absolute_exposure=140) - - probabilities = calculate_probabilities(variant_control, [variant_test_1, variant_test_2]) # a is control - self.assertAlmostEqual(probabilities[0], 0.86, places=1) - self.assertAlmostEqual(probabilities[1], 0, places=1) - self.assertAlmostEqual(probabilities[2], 0.13, places=1) - - computed_probability = calculate_probability_of_winning_for_target_count_data( - variant_test_1, [variant_control, variant_test_2] - ) - self.assertAlmostEqual(probabilities[1], computed_probability, places=1) - - computed_probability = calculate_probability_of_winning_for_target_count_data( - variant_control, [variant_test_1, variant_test_2] - ) - self.assertAlmostEqual(probabilities[0], computed_probability, places=1) - - p_value = calculate_p_value(variant_control, [variant_test_1, variant_test_2]) - self.assertAlmostEqual(p_value, 0, places=3) - - significant, p_value = are_results_significant(variant_control, [variant_test_1, variant_test_2], probabilities) - self.assertAlmostEqual(p_value, 1, places=3) - # False because max probability is less than 0.9 - self.assertEqual(significant, ExperimentSignificanceCode.LOW_WIN_PROBABILITY) - - credible_intervals = calculate_credible_intervals([variant_control, variant_test_1, variant_test_2]) - self.assertAlmostEqual(credible_intervals[variant_control.key][0], 0.1460, places=3) - self.assertAlmostEqual(credible_intervals[variant_control.key][1], 0.2535, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_1.key][0], 0.0585, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_1.key][1], 0.1190, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_2.key][0], 0.0929, places=3) - self.assertAlmostEqual(credible_intervals[variant_test_2.key][1], 0.2206, places=3) diff --git a/ee/clickhouse/queries/experiments/test_utils.py b/ee/clickhouse/queries/experiments/test_utils.py deleted file mode 100644 index f01b00d0bb..0000000000 --- a/ee/clickhouse/queries/experiments/test_utils.py +++ /dev/null @@ -1,160 +0,0 @@ -from ee.clickhouse.queries.experiments.utils import requires_flag_warning -from posthog.constants import INSIGHT_FUNNELS -from posthog.models.action.action import Action -from posthog.models.filters.filter import Filter -from posthog.test.base import APIBaseTest, ClickhouseTestMixin -from posthog.test.test_journeys import journeys_for - - -class TestUtils(ClickhouseTestMixin, APIBaseTest): - def test_with_no_feature_flag_properties_on_events(self): - journeys_for( - team=self.team, - events_by_person={ - "person1": [ - {"event": "user signed up", "properties": {"$os": "Windows"}}, - ], - "person2": [ - {"event": "user signed up", "properties": {"$os": "Windows"}}, - ], - }, - ) - - filter = Filter( - data={ - "events": [{"id": "user signed up", "type": "events", "order": 0}], - "insight": INSIGHT_FUNNELS, - } - ) - - self.assertTrue(requires_flag_warning(filter, self.team)) - - def test_with_feature_flag_properties_on_events(self): - journeys_for( - team=self.team, - events_by_person={ - "person1": [ - { - "event": "user signed up", - "properties": {"$os": "Windows", "$feature/aloha": "control"}, - }, - ], - "person2": [ - { - "event": "user signed up", - "properties": {"$os": "Windows", "$feature/aloha": "test"}, - }, - ], - }, - ) - - filter = Filter( - data={ - "events": [{"id": "user signed up", "type": "events", "order": 0}], - "insight": INSIGHT_FUNNELS, - } - ) - - self.assertFalse(requires_flag_warning(filter, self.team)) - - def test_with_no_feature_flag_properties_on_actions(self): - action_credit_card = Action.objects.create( - team=self.team, - name="paid", - steps_json=[ - { - "event": "paid", - "properties": [ - { - "key": "$os", - "type": "event", - "value": ["Windows"], - "operator": "exact", - } - ], - }, - { - "event": "$autocapture", - "tag_name": "button", - "text": "Pay $10", - }, - ], - ) - - filter = Filter( - data={ - "events": [{"id": "user signed up", "type": "events", "order": 0}], - "actions": [ - {"id": action_credit_card.pk, "type": "actions", "order": 1}, - ], - "insight": INSIGHT_FUNNELS, - } - ) - - journeys_for( - team=self.team, - events_by_person={ - "person1": [ - {"event": "user signed up", "properties": {"$os": "Windows"}}, - {"event": "paid", "properties": {"$os": "Windows"}}, - ], - "person2": [ - {"event": "paid", "properties": {"$os": "Windows"}}, - ], - "person3": [ - {"event": "user signed up", "properties": {"$os": "Windows"}}, - ], - }, - ) - - self.assertTrue(requires_flag_warning(filter, self.team)) - - def test_with_feature_flag_properties_on_actions(self): - action_credit_card = Action.objects.create( - team=self.team, - name="paid", - steps_json=[ - { - "event": "paid", - "properties": [ - { - "key": "$os", - "type": "event", - "value": ["Windows"], - "operator": "exact", - } - ], - } - ], - ) - - filter = Filter( - data={ - "events": [{"id": "user signed up", "type": "events", "order": 0}], - "actions": [ - {"id": action_credit_card.pk, "type": "actions", "order": 1}, - ], - "insight": INSIGHT_FUNNELS, - } - ) - - journeys_for( - team=self.team, - events_by_person={ - "person1": [ - {"event": "user signed up", "properties": {"$os": "Windows"}}, - {"event": "paid", "properties": {"$os": "Windows"}}, - ], - "person2": [ - { - "event": "paid", - "properties": {"$os": "Windows", "$feature/aloha": "test"}, - }, - ], - "person3": [ - {"event": "user signed up", "properties": {"$os": "Windows"}}, - ], - }, - ) - - self.assertFalse(requires_flag_warning(filter, self.team)) diff --git a/ee/clickhouse/queries/experiments/trend_experiment_result.py b/ee/clickhouse/queries/experiments/trend_experiment_result.py deleted file mode 100644 index cab803dff9..0000000000 --- a/ee/clickhouse/queries/experiments/trend_experiment_result.py +++ /dev/null @@ -1,362 +0,0 @@ -import json -from dataclasses import asdict, dataclass -from datetime import datetime -from typing import Optional -from zoneinfo import ZoneInfo - -from rest_framework.exceptions import ValidationError - -from ee.clickhouse.queries.experiments import ( - CONTROL_VARIANT_KEY, -) -from posthog.constants import ( - ACTIONS, - EVENTS, - TRENDS_CUMULATIVE, - TRENDS_LINEAR, - UNIQUE_USERS, - ExperimentNoResultsErrorKeys, -) -from posthog.hogql_queries.experiments.trends_statistics import ( - are_results_significant, - calculate_credible_intervals, - calculate_probabilities, -) -from posthog.models.experiment import ExperimentHoldout -from posthog.models.feature_flag import FeatureFlag -from posthog.models.filters.filter import Filter -from posthog.models.team import Team -from posthog.queries.trends.trends import Trends -from posthog.queries.trends.util import ALL_SUPPORTED_MATH_FUNCTIONS -from posthog.schema import ExperimentSignificanceCode - -Probability = float - - -@dataclass(frozen=True) -class Variant: - key: str - count: int - # a fractional value, representing the proportion of the variant's exposure events relative to *control* exposure events - # default: the proportion of unique users relative to the *control* unique users - exposure: float - # count of total exposure events exposed for a variant - # default: total number of unique users exposed to the variant (via "Feature flag called" event) - absolute_exposure: int - - -def uses_math_aggregation_by_user_or_property_value(filter: Filter): - # sync with frontend: https://github.com/PostHog/posthog/blob/master/frontend/src/scenes/experiments/experimentLogic.tsx#L662 - # the selector experimentCountPerUserMath - - entities = filter.entities - math_keys = ALL_SUPPORTED_MATH_FUNCTIONS - - # 'sum' doesn't need special handling, we can have custom exposure for sum filters - if "sum" in math_keys: - math_keys.remove("sum") - - return any(entity.math in math_keys for entity in entities) - - -class ClickhouseTrendExperimentResult: - """ - This class calculates Experiment Results. - It returns two things: - 1. A trend Breakdown based on Feature Flag values - 2. Probability that Feature Flag value 1 has better conversion rate then FeatureFlag value 2 - - Currently, it only supports two feature flag values: control and test - - The passed in Filter determines which trend to create, along with the experiment start & end date values - - Calculating (2) uses the formula here: https://www.evanmiller.org/bayesian-ab-testing.html#count_ab - """ - - def __init__( - self, - filter: Filter, - team: Team, - feature_flag: FeatureFlag, - experiment_start_date: datetime, - experiment_end_date: Optional[datetime] = None, - trend_class: type[Trends] = Trends, - custom_exposure_filter: Optional[Filter] = None, - holdout: Optional[ExperimentHoldout] = None, - ): - breakdown_key = f"$feature/{feature_flag.key}" - self.variants = [variant["key"] for variant in feature_flag.variants] - if holdout: - self.variants.append(f"holdout-{holdout.id}") - - # our filters assume that the given time ranges are in the project timezone. - # while start and end date are in UTC. - # so we need to convert them to the project timezone - if team.timezone: - start_date_in_project_timezone = experiment_start_date.astimezone(ZoneInfo(team.timezone)) - end_date_in_project_timezone = ( - experiment_end_date.astimezone(ZoneInfo(team.timezone)) if experiment_end_date else None - ) - - uses_math_aggregation = uses_math_aggregation_by_user_or_property_value(filter) - - # Keep in sync with https://github.com/PostHog/posthog/blob/master/frontend/src/scenes/experiments/ExperimentView/components.tsx#L91 - query_filter = filter.shallow_clone( - { - "display": TRENDS_CUMULATIVE if not uses_math_aggregation else TRENDS_LINEAR, - "date_from": start_date_in_project_timezone, - "date_to": end_date_in_project_timezone, - "explicit_date": True, - "breakdown": breakdown_key, - "breakdown_type": "event", - "properties": [ - { - "key": breakdown_key, - "value": self.variants, - "operator": "exact", - "type": "event", - } - ], - # :TRICKY: We don't use properties set on filters, instead using experiment variant options - # :TRICKY: We don't use properties set on filters, as these - # correspond to feature flag properties, not the trend properties. - # This is also why we simplify only right now so new properties (from test account filters) - # are added appropriately. - "is_simplified": False, - } - ) - - if uses_math_aggregation: - # A trend experiment can have only one metric, so take the first one to calculate exposure - # We copy the entity to avoid mutating the original filter - entity = query_filter.shallow_clone({}).entities[0] - # :TRICKY: With count per user aggregation, our exposure filter is implicit: - # (1) We calculate the unique users for this event -> this is the exposure - # (2) We calculate the total count of this event -> this is the trend goal metric / arrival rate for probability calculation - # TODO: When we support group aggregation per user, change this. - entity.math = None - exposure_entity = entity.to_dict() - entity.math = UNIQUE_USERS - count_entity = entity.to_dict() - - target_entities = [exposure_entity, count_entity] - query_filter_actions = [] - query_filter_events = [] - if entity.type == ACTIONS: - query_filter_actions = target_entities - else: - query_filter_events = target_entities - - # two entities in exposure, one for count, the other for result - exposure_filter = query_filter.shallow_clone( - { - "display": TRENDS_CUMULATIVE, - ACTIONS: query_filter_actions, - EVENTS: query_filter_events, - } - ) - - else: - # TODO: Exposure doesn't need to compute daily values, so instead of - # using TRENDS_CUMULATIVE, we can use TRENDS_TABLE to just get the total. - if custom_exposure_filter: - exposure_filter = custom_exposure_filter.shallow_clone( - { - "display": TRENDS_CUMULATIVE, - "date_from": experiment_start_date, - "date_to": experiment_end_date, - "explicit_date": True, - "breakdown": breakdown_key, - "breakdown_type": "event", - "properties": [ - { - "key": breakdown_key, - "value": self.variants, - "operator": "exact", - "type": "event", - } - ], - # :TRICKY: We don't use properties set on filters, as these - # correspond to feature flag properties, not the trend-exposure properties. - # This is also why we simplify only right now so new properties (from test account filters) - # are added appropriately. - "is_simplified": False, - } - ) - else: - exposure_filter = filter.shallow_clone( - { - "display": TRENDS_CUMULATIVE, - "date_from": experiment_start_date, - "date_to": experiment_end_date, - "explicit_date": True, - ACTIONS: [], - EVENTS: [ - { - "id": "$feature_flag_called", - "name": "$feature_flag_called", - "order": 0, - "type": "events", - "math": "dau", - } - ], - "breakdown_type": "event", - "breakdown": "$feature_flag_response", - "properties": [ - { - "key": "$feature_flag_response", - "value": self.variants, - "operator": "exact", - "type": "event", - }, - { - "key": "$feature_flag", - "value": [feature_flag.key], - "operator": "exact", - "type": "event", - }, - ], - # :TRICKY: We don't use properties set on filters, as these - # correspond to feature flag properties, not the trend-exposure properties. - # This is also why we simplify only right now so new properties (from test account filters) - # are added appropriately. - "is_simplified": False, - } - ) - - self.query_filter = query_filter - self.exposure_filter = exposure_filter - self.team = team - self.insight = trend_class() - - def get_results(self, validate: bool = True): - insight_results = self.insight.run(self.query_filter, self.team) - exposure_results = self.insight.run(self.exposure_filter, self.team) - - basic_result_props = { - "insight": insight_results, - "filters": self.query_filter.to_dict(), - "exposure_filters": self.exposure_filter.to_dict(), - } - - try: - validate_event_variants(insight_results, self.variants) - - control_variant, test_variants = self.get_variants(insight_results, exposure_results) - - probabilities = calculate_probabilities(control_variant, test_variants) - - mapping = { - variant.key: probability - for variant, probability in zip([control_variant, *test_variants], probabilities) - } - - significance_code, p_value = are_results_significant(control_variant, test_variants, probabilities) - - credible_intervals = calculate_credible_intervals([control_variant, *test_variants]) - except ValidationError: - if validate: - raise - else: - return basic_result_props - - return { - **basic_result_props, - "probability": mapping, - "significant": significance_code == ExperimentSignificanceCode.SIGNIFICANT, - "significance_code": significance_code, - "p_value": p_value, - "variants": [asdict(variant) for variant in [control_variant, *test_variants]], - "credible_intervals": credible_intervals, - } - - def get_variants(self, insight_results, exposure_results): - # this assumes the Trend insight is Cumulative - control_variant = None - test_variants = [] - exposure_counts = {} - exposure_ratios = {} - - # :TRICKY: With count per user aggregation, our exposure filter is implicit: - # (1) We calculate the unique users for this event -> this is the exposure - # (2) We calculate the total count of this event -> this is the trend goal metric / arrival rate for probability calculation - # TODO: When we support group aggregation per user, change this. - if uses_math_aggregation_by_user_or_property_value(self.query_filter): - filtered_exposure_results = [ - result for result in exposure_results if result["action"]["math"] == UNIQUE_USERS - ] - filtered_insight_results = [ - result for result in exposure_results if result["action"]["math"] != UNIQUE_USERS - ] - else: - filtered_exposure_results = exposure_results - filtered_insight_results = insight_results - - for result in filtered_exposure_results: - count = result["count"] - breakdown_value = result["breakdown_value"] - exposure_counts[breakdown_value] = count - - control_exposure = exposure_counts.get(CONTROL_VARIANT_KEY, 0) - - if control_exposure != 0: - for key, count in exposure_counts.items(): - exposure_ratios[key] = count / control_exposure - - for result in filtered_insight_results: - count = result["count"] - breakdown_value = result["breakdown_value"] - if breakdown_value == CONTROL_VARIANT_KEY: - # count exposure value is always 1, the baseline - control_variant = Variant( - key=breakdown_value, - count=int(count), - exposure=1, - absolute_exposure=exposure_counts.get(breakdown_value, 1), - ) - else: - test_variants.append( - Variant( - breakdown_value, - int(count), - exposure_ratios.get(breakdown_value, 1), - exposure_counts.get(breakdown_value, 1), - ) - ) - - return control_variant, test_variants - - -def validate_event_variants(trend_results, variants): - errors = { - ExperimentNoResultsErrorKeys.NO_EVENTS: True, - ExperimentNoResultsErrorKeys.NO_FLAG_INFO: True, - ExperimentNoResultsErrorKeys.NO_CONTROL_VARIANT: True, - ExperimentNoResultsErrorKeys.NO_TEST_VARIANT: True, - } - - if not trend_results or not trend_results[0]: - raise ValidationError(code="no-results", detail=json.dumps(errors)) - - errors[ExperimentNoResultsErrorKeys.NO_EVENTS] = False - - # Check if "control" is present - for event in trend_results: - event_variant = event.get("breakdown_value") - if event_variant == "control": - errors[ExperimentNoResultsErrorKeys.NO_CONTROL_VARIANT] = False - errors[ExperimentNoResultsErrorKeys.NO_FLAG_INFO] = False - break - - # Check if at least one of the test variants is present - test_variants = [variant for variant in variants if variant != "control"] - for event in trend_results: - event_variant = event.get("breakdown_value") - if event_variant in test_variants: - errors[ExperimentNoResultsErrorKeys.NO_TEST_VARIANT] = False - errors[ExperimentNoResultsErrorKeys.NO_FLAG_INFO] = False - break - - has_errors = any(errors.values()) - if has_errors: - raise ValidationError(detail=json.dumps(errors)) diff --git a/ee/clickhouse/queries/experiments/utils.py b/ee/clickhouse/queries/experiments/utils.py deleted file mode 100644 index 5837a6aa9d..0000000000 --- a/ee/clickhouse/queries/experiments/utils.py +++ /dev/null @@ -1,68 +0,0 @@ -from typing import Union - -from posthog.client import sync_execute -from posthog.constants import TREND_FILTER_TYPE_ACTIONS -from posthog.models.filters.filter import Filter -from posthog.models.team.team import Team -from posthog.queries.query_date_range import QueryDateRange - - -def requires_flag_warning(filter: Filter, team: Team) -> bool: - date_params = {} - query_date_range = QueryDateRange(filter=filter, team=team, should_round=False) - parsed_date_from, date_from_params = query_date_range.date_from - parsed_date_to, date_to_params = query_date_range.date_to - date_params.update(date_from_params) - date_params.update(date_to_params) - - date_query = f""" - {parsed_date_from} - {parsed_date_to} - """ - - events: set[Union[int, str]] = set() - entities_to_use = filter.entities - - for entity in entities_to_use: - if entity.type == TREND_FILTER_TYPE_ACTIONS: - action = entity.get_action() - for step_event in action.get_step_events(): - if step_event: - # TODO: Fix this to detect if "all events" (i.e. None) is in the list and change the entiry query to e.g. AND 1=1 - events.add(step_event) - elif entity.id is not None: - events.add(entity.id) - - entity_query = f"AND event IN %(events_list)s" - entity_params = {"events_list": sorted(events)} - - events_result = sync_execute( - f""" - SELECT - event, - groupArraySample(%(limit)s)(properties) - FROM events - WHERE - team_id = %(team_id)s - {entity_query} - {date_query} - GROUP BY event - """, - { - "team_id": team.pk, - "limit": filter.limit or 20, - **date_params, - **entity_params, - **filter.hogql_context.values, - }, - ) - - requires_flag_warning = True - - for _event, property_group_list in events_result: - for property_group in property_group_list: - if "$feature/" in property_group: - requires_flag_warning = False - break - - return requires_flag_warning diff --git a/ee/clickhouse/queries/funnels/__init__.py b/ee/clickhouse/queries/funnels/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/clickhouse/queries/funnels/funnel_correlation.py b/ee/clickhouse/queries/funnels/funnel_correlation.py deleted file mode 100644 index 0b909c84b3..0000000000 --- a/ee/clickhouse/queries/funnels/funnel_correlation.py +++ /dev/null @@ -1,971 +0,0 @@ -import dataclasses -import urllib.parse -from typing import ( - Any, - Literal, - Optional, - TypedDict, - Union, - cast, -) - -from rest_framework.exceptions import ValidationError - -from ee.clickhouse.queries.column_optimizer import EnterpriseColumnOptimizer -from ee.clickhouse.queries.groups_join_query import GroupsJoinQuery -from posthog.clickhouse.materialized_columns import get_materialized_column_for_property -from posthog.constants import ( - AUTOCAPTURE_EVENT, - TREND_FILTER_TYPE_ACTIONS, - FunnelCorrelationType, -) -from posthog.models.element.element import chain_to_elements -from posthog.models.event.util import ElementSerializer -from posthog.models.filters import Filter -from posthog.models.property.util import get_property_string_expr -from posthog.models.team import Team -from posthog.queries.funnels.utils import get_funnel_order_actor_class -from posthog.queries.insight import insight_sync_execute -from posthog.queries.person_distinct_id_query import get_team_distinct_ids_query -from posthog.queries.person_query import PersonQuery -from posthog.queries.util import alias_poe_mode_for_legacy, correct_result_for_sampling -from posthog.schema import PersonsOnEventsMode -from posthog.utils import generate_short_id - - -class EventDefinition(TypedDict): - event: str - properties: dict[str, Any] - elements: list - - -class EventOddsRatio(TypedDict): - event: str - - success_count: int - failure_count: int - - odds_ratio: float - correlation_type: Literal["success", "failure"] - - -class EventOddsRatioSerialized(TypedDict): - event: EventDefinition - - success_count: int - success_people_url: Optional[str] - - failure_count: int - failure_people_url: Optional[str] - - odds_ratio: float - correlation_type: Literal["success", "failure"] - - -class FunnelCorrelationResponse(TypedDict): - """ - The structure that the diagnose response will be returned in. - NOTE: TypedDict is used here to comply with existing formats from other - queries, but we could use, for example, a dataclass - """ - - events: list[EventOddsRatioSerialized] - skewed: bool - - -@dataclasses.dataclass -class EventStats: - success_count: int - failure_count: int - - -@dataclasses.dataclass -class EventContingencyTable: - """ - Represents a contingency table for a single event. Note that this isn't a - complete contingency table, but rather only includes totals for - failure/success as opposed to including the number of successes for cases - that a persons _doesn't_ visit an event. - """ - - event: str - visited: EventStats - - success_total: int - failure_total: int - - -class FunnelCorrelation: - TOTAL_IDENTIFIER = "Total_Values_In_Query" - ELEMENTS_DIVIDER = "__~~__" - AUTOCAPTURE_EVENT_TYPE = "$event_type" - MIN_PERSON_COUNT = 25 - MIN_PERSON_PERCENTAGE = 0.02 - PRIOR_COUNT = 1 - - def __init__( - self, - filter: Filter, # Used to filter people - team: Team, # Used to partition by team - base_uri: str = "/", # Used to generate absolute urls - ) -> None: - self._filter = filter - self._team = team - self._base_uri = base_uri - - if self._filter.funnel_step is None: - self._filter = self._filter.shallow_clone({"funnel_step": 1}) - # Funnel Step by default set to 1, to give us all people who entered the funnel - - # Used for generating the funnel persons cte - - filter_data = { - key: value - for key, value in self._filter.to_dict().items() - # NOTE: we want to filter anything about correlation, as the - # funnel persons endpoint does not understand or need these - # params. - if not key.startswith("funnel_correlation_") - } - # NOTE: we always use the final matching event for the recording because this - # is the the right event for both drop off and successful funnels - filter_data.update({"include_final_matching_events": self._filter.include_recordings}) - filter = Filter(data=filter_data, hogql_context=self._filter.hogql_context) - - funnel_order_actor_class = get_funnel_order_actor_class(filter) - - self._funnel_actors_generator = funnel_order_actor_class( - filter, - self._team, - # NOTE: we want to include the latest timestamp of the `target_step`, - # from this we can deduce if the person reached the end of the funnel, - # i.e. successful - include_timestamp=True, - # NOTE: we don't need these as we have all the information we need to - # deduce if the person was successful or not - include_preceding_timestamp=False, - include_properties=self.properties_to_include, - ) - - @property - def properties_to_include(self) -> list[str]: - props_to_include = [] - if ( - alias_poe_mode_for_legacy(self._team.person_on_events_mode) != PersonsOnEventsMode.DISABLED - and self._filter.correlation_type == FunnelCorrelationType.PROPERTIES - ): - # When dealing with properties, make sure funnel response comes with properties - # so we don't have to join on persons/groups to get these properties again - for property_name in cast(list, self._filter.correlation_property_names): - if self._filter.aggregation_group_type_index is not None: - continue # We don't support group properties on events at this time - else: - if "$all" == property_name: - return [f"person_properties"] - - possible_mat_col = get_materialized_column_for_property( - "events", "person_properties", property_name - ) - if possible_mat_col is not None and not possible_mat_col.is_nullable: - props_to_include.append(possible_mat_col.name) - else: - props_to_include.append(f"person_properties") - - return props_to_include - - def support_autocapture_elements(self) -> bool: - if ( - self._filter.correlation_type == FunnelCorrelationType.EVENT_WITH_PROPERTIES - and AUTOCAPTURE_EVENT in self._filter.correlation_event_names - ): - return True - return False - - def get_contingency_table_query(self) -> tuple[str, dict[str, Any]]: - """ - Returns a query string and params, which are used to generate the contingency table. - The query returns success and failure count for event / property values, along with total success and failure counts. - """ - if self._filter.correlation_type == FunnelCorrelationType.PROPERTIES: - return self.get_properties_query() - - if self._filter.correlation_type == FunnelCorrelationType.EVENT_WITH_PROPERTIES: - return self.get_event_property_query() - - return self.get_event_query() - - def get_event_query(self) -> tuple[str, dict[str, Any]]: - funnel_persons_query, funnel_persons_params = self.get_funnel_actors_cte() - - event_join_query = self._get_events_join_query() - - query = f""" - WITH - funnel_actors as ({funnel_persons_query}), - toDateTime(%(date_to)s, %(timezone)s) AS date_to, - toDateTime(%(date_from)s, %(timezone)s) AS date_from, - %(target_step)s AS target_step, - %(funnel_step_names)s as funnel_step_names - - SELECT - event.event AS name, - - -- If we have a `person.steps = target_step`, we know the person - -- reached the end of the funnel - countDistinctIf( - actors.actor_id, - actors.steps = target_step - ) AS success_count, - - -- And the converse being for failures - countDistinctIf( - actors.actor_id, - actors.steps <> target_step - ) AS failure_count - - FROM events AS event - {event_join_query} - AND event.event NOT IN %(exclude_event_names)s - GROUP BY name - - -- To get the total success/failure numbers, we do an aggregation on - -- the funnel people CTE and count distinct actor_ids - UNION ALL - - SELECT - -- We're not using WITH TOTALS because the resulting queries are - -- not runnable in Metabase - '{self.TOTAL_IDENTIFIER}' as name, - - countDistinctIf( - actors.actor_id, - actors.steps = target_step - ) AS success_count, - - countDistinctIf( - actors.actor_id, - actors.steps <> target_step - ) AS failure_count - FROM funnel_actors AS actors - """ - params = { - **funnel_persons_params, - "funnel_step_names": self._get_funnel_step_names(), - "target_step": len(self._filter.entities), - "exclude_event_names": self._filter.correlation_event_exclude_names, - } - - return query, params - - def get_event_property_query(self) -> tuple[str, dict[str, Any]]: - if not self._filter.correlation_event_names: - raise ValidationError("Event Property Correlation expects atleast one event name to run correlation on") - - funnel_persons_query, funnel_persons_params = self.get_funnel_actors_cte() - - event_join_query = self._get_events_join_query() - - if self.support_autocapture_elements(): - event_type_expression, _ = get_property_string_expr( - "events", - self.AUTOCAPTURE_EVENT_TYPE, - f"'{self.AUTOCAPTURE_EVENT_TYPE}'", - "properties", - ) - array_join_query = f""" - 'elements_chain' as prop_key, - concat({event_type_expression}, '{self.ELEMENTS_DIVIDER}', elements_chain) as prop_value, - tuple(prop_key, prop_value) as prop - """ - else: - array_join_query = f""" - arrayJoin(JSONExtractKeysAndValues(properties, 'String')) as prop - """ - - query = f""" - WITH - funnel_actors as ({funnel_persons_query}), - toDateTime(%(date_to)s, %(timezone)s) AS date_to, - toDateTime(%(date_from)s, %(timezone)s) AS date_from, - %(target_step)s AS target_step, - %(funnel_step_names)s as funnel_step_names - - SELECT concat(event_name, '::', prop.1, '::', prop.2) as name, - countDistinctIf(actor_id, steps = target_step) as success_count, - countDistinctIf(actor_id, steps <> target_step) as failure_count - FROM ( - SELECT - actors.actor_id as actor_id, - actors.steps as steps, - events.event as event_name, - -- Same as what we do in $all property queries - {array_join_query} - FROM events AS event - {event_join_query} - AND event.event IN %(event_names)s - ) - GROUP BY name, prop - -- Discard high cardinality / low hits properties - -- This removes the long tail of random properties with empty, null, or very small values - HAVING (success_count + failure_count) > 2 - AND prop.1 NOT IN %(exclude_property_names)s - - UNION ALL - -- To get the total success/failure numbers, we do an aggregation on - -- the funnel people CTE and count distinct actor_ids - SELECT - '{self.TOTAL_IDENTIFIER}' as name, - - countDistinctIf( - actors.actor_id, - actors.steps = target_step - ) AS success_count, - - countDistinctIf( - actors.actor_id, - actors.steps <> target_step - ) AS failure_count - FROM funnel_actors AS actors - """ - params = { - **funnel_persons_params, - "funnel_step_names": self._get_funnel_step_names(), - "target_step": len(self._filter.entities), - "event_names": self._filter.correlation_event_names, - "exclude_property_names": self._filter.correlation_event_exclude_property_names, - } - - return query, params - - def get_properties_query(self) -> tuple[str, dict[str, Any]]: - if not self._filter.correlation_property_names: - raise ValidationError("Property Correlation expects atleast one Property to run correlation on") - - funnel_actors_query, funnel_actors_params = self.get_funnel_actors_cte() - - person_prop_query, person_prop_params = self._get_properties_prop_clause() - - ( - aggregation_join_query, - aggregation_join_params, - ) = self._get_aggregation_join_query() - - query = f""" - WITH - funnel_actors as ({funnel_actors_query}), - %(target_step)s AS target_step - SELECT - concat(prop.1, '::', prop.2) as name, - -- We generate a unique identifier for each property value as: PropertyName::Value - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count - FROM ( - SELECT - actor_id, - funnel_actors.steps as steps, - /* - We can extract multiple property values at the same time, since we're - already querying the person table. - This gives us something like: - -------------------- - person1, steps, [property_value_0, property_value_1, property_value_2] - person2, steps, [property_value_0, property_value_1, property_value_2] - - To group by property name, we need to extract the property from the array. ArrayJoin helps us do that. - It transforms the above into: - - -------------------- - - person1, steps, property_value_0 - person1, steps, property_value_1 - person1, steps, property_value_2 - - person2, steps, property_value_0 - person2, steps, property_value_1 - person2, steps, property_value_2 - - To avoid clashes and clarify the values, we also zip with the property name, to generate - tuples like: (property_name, property_value), which we then group by - */ - {person_prop_query} - FROM funnel_actors - {aggregation_join_query} - - ) aggregation_target_with_props - -- Group by the tuple items: (property_name, property_value) generated by zip - GROUP BY prop.1, prop.2 - HAVING prop.1 NOT IN %(exclude_property_names)s - UNION ALL - SELECT - '{self.TOTAL_IDENTIFIER}' as name, - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count - FROM funnel_actors - """ - params = { - **funnel_actors_params, - **person_prop_params, - **aggregation_join_params, - "target_step": len(self._filter.entities), - "property_names": self._filter.correlation_property_names, - "exclude_property_names": self._filter.correlation_property_exclude_names, - } - - return query, params - - def _get_aggregation_target_join_query(self) -> str: - if self._team.person_on_events_mode == PersonsOnEventsMode.PERSON_ID_NO_OVERRIDE_PROPERTIES_ON_EVENTS: - aggregation_person_join = f""" - JOIN funnel_actors as actors - ON event.person_id = actors.actor_id - """ - - else: - aggregation_person_join = f""" - JOIN ({get_team_distinct_ids_query(self._team.pk)}) AS pdi - ON pdi.distinct_id = events.distinct_id - - -- NOTE: I would love to right join here, so we count get total - -- success/failure numbers in one pass, but this causes out of memory - -- error mentioning issues with right filling. I'm sure there's a way - -- to do it but lifes too short. - JOIN funnel_actors AS actors - ON pdi.person_id = actors.actor_id - """ - - aggregation_group_join = f""" - JOIN funnel_actors AS actors - ON actors.actor_id = events.$group_{self._filter.aggregation_group_type_index} - """ - - return ( - aggregation_group_join if self._filter.aggregation_group_type_index is not None else aggregation_person_join - ) - - def _get_events_join_query(self) -> str: - """ - This query is used to join and filter the events table corresponding to the funnel_actors CTE. - It expects the following variables to be present in the CTE expression: - - funnel_actors - - date_to - - date_from - - funnel_step_names - """ - - return f""" - {self._get_aggregation_target_join_query()} - - -- Make sure we're only looking at events before the final step, or - -- failing that, date_to - WHERE - -- add this condition in to ensure we can filter events before - -- joining funnel_actors - toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - - AND event.team_id = {self._team.pk} - - -- Add in per actor filtering on event time range. We just want - -- to include events that happened within the bounds of the - -- actors time in the funnel. - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE( - actors.final_timestamp, - actors.first_timestamp + INTERVAL {self._funnel_actors_generator._filter.funnel_window_interval} {self._funnel_actors_generator._filter.funnel_window_interval_unit_ch()}, - date_to) - -- Ensure that the event is not outside the bounds of the funnel conversion window - - -- Exclude funnel steps - AND event.event NOT IN funnel_step_names - """ - - def _get_aggregation_join_query(self): - if self._filter.aggregation_group_type_index is None: - person_query, person_query_params = PersonQuery( - self._filter, - self._team.pk, - EnterpriseColumnOptimizer(self._filter, self._team.pk), - ).get_query() - - return ( - f""" - JOIN ({person_query}) person - ON person.id = funnel_actors.actor_id - """, - person_query_params, - ) - else: - return GroupsJoinQuery(self._filter, self._team.pk, join_key="funnel_actors.actor_id").get_join_query() - - def _get_properties_prop_clause(self): - if ( - alias_poe_mode_for_legacy(self._team.person_on_events_mode) != PersonsOnEventsMode.DISABLED - and self._filter.aggregation_group_type_index is None - ): - aggregation_properties_alias = "person_properties" - else: - group_properties_field = f"groups_{self._filter.aggregation_group_type_index}.group_properties_{self._filter.aggregation_group_type_index}" - aggregation_properties_alias = ( - PersonQuery.PERSON_PROPERTIES_ALIAS - if self._filter.aggregation_group_type_index is None - else group_properties_field - ) - - if "$all" in cast(list, self._filter.correlation_property_names): - return ( - f""" - arrayJoin(JSONExtractKeysAndValues({aggregation_properties_alias}, 'String')) as prop - """, - {}, - ) - else: - person_property_expressions = [] - person_property_params = {} - for index, property_name in enumerate(cast(list, self._filter.correlation_property_names)): - param_name = f"property_name_{index}" - if self._filter.aggregation_group_type_index is not None: - expression, _ = get_property_string_expr( - "groups" - if alias_poe_mode_for_legacy(self._team.person_on_events_mode) == PersonsOnEventsMode.DISABLED - else "events", - property_name, - f"%({param_name})s", - aggregation_properties_alias, - materialised_table_column=aggregation_properties_alias, - ) - else: - expression, _ = get_property_string_expr( - "person" - if alias_poe_mode_for_legacy(self._team.person_on_events_mode) == PersonsOnEventsMode.DISABLED - else "events", - property_name, - f"%({param_name})s", - aggregation_properties_alias, - materialised_table_column=( - aggregation_properties_alias - if alias_poe_mode_for_legacy(self._team.person_on_events_mode) - != PersonsOnEventsMode.DISABLED - else "properties" - ), - ) - person_property_params[param_name] = property_name - person_property_expressions.append(expression) - - return ( - f""" - arrayJoin(arrayZip( - %(property_names)s, - [{','.join(person_property_expressions)}] - )) as prop - """, - person_property_params, - ) - - def _get_funnel_step_names(self): - events: set[Union[int, str]] = set() - for entity in self._filter.entities: - if entity.type == TREND_FILTER_TYPE_ACTIONS: - action = entity.get_action() - events.update([x for x in action.get_step_events() if x]) - elif entity.id is not None: - events.add(entity.id) - - return sorted(events) - - def _run(self) -> tuple[list[EventOddsRatio], bool]: - """ - Run the diagnose query. - - Funnel Correlation queries take as input the same as the funnel query, - and returns the correlation of person events with a person successfully - getting to the end of the funnel. We use Odds Ratios as the correlation - metric. See https://en.wikipedia.org/wiki/Odds_ratio for more details. - - Roughly speaking, to calculate the odds ratio, we build a contingency - table https://en.wikipedia.org/wiki/Contingency_table for each - dimension, then calculate the odds ratio for each. - - For example, take for simplicity the cohort of all people, and the - success criteria of having a "signed up" event. First we would build a - contingency table like: - - | | success | failure | total | - | -----------------: | :-----: | :-----: | :---: | - | watched video | 5 | 1 | 6 | - | didn't watch video | 2 | 10 | 12 | - - - Then the odds that a person signs up given they watched the video is 5 / - 1. - - And the odds that a person signs up given they didn't watch the video is - 2 / 10. - - So we say the odds ratio is 5 / 1 over 2 / 10 = 25 . The further away the - odds ratio is from 1, the greater the correlation. - - Requirements: - - - Intitially we only need to consider the names of events that a cohort - person has emitted. So we explicitly are not interested in e.g. - correlating properties, although this will be a follow-up. - - Non-functional requirements: - - - there can be perhaps millions of people in a cohort, so we should - consider this when writing the algorithm. e.g. we should probably - avoid pulling all people into across the wire. - - there can be an order of magnitude more events than people, so we - should avoid pulling all events across the wire. - - there may be a large but not huge number of distinct events, let's say - 100 different names for events. We should avoid n+1 queries for the - event names dimension - - Contincency tables are something we can pull out of the db, so we can - have a query that: - - 1. filters people by the cohort criteria - 2. groups these people by the success criteria - 3. groups people by our criterion with which we want to test - correlation, e.g. "watched video" - - """ - self._filter.team = self._team - - ( - event_contingency_tables, - success_total, - failure_total, - ) = self.get_partial_event_contingency_tables() - - success_total = int(correct_result_for_sampling(success_total, self._filter.sampling_factor)) - failure_total = int(correct_result_for_sampling(failure_total, self._filter.sampling_factor)) - - if not success_total or not failure_total: - return [], True - - skewed_totals = False - - # If the ratio is greater than 1:10, then we have a skewed result, so we should - # warn the user. - if success_total / failure_total > 10 or failure_total / success_total > 10: - skewed_totals = True - - odds_ratios = [ - get_entity_odds_ratio(event_stats, FunnelCorrelation.PRIOR_COUNT) - for event_stats in event_contingency_tables - if not FunnelCorrelation.are_results_insignificant(event_stats) - ] - - positively_correlated_events = sorted( - [odds_ratio for odds_ratio in odds_ratios if odds_ratio["correlation_type"] == "success"], - key=lambda x: x["odds_ratio"], - reverse=True, - ) - - negatively_correlated_events = sorted( - [odds_ratio for odds_ratio in odds_ratios if odds_ratio["correlation_type"] == "failure"], - key=lambda x: x["odds_ratio"], - reverse=False, - ) - - # Return the top ten positively correlated events, and top then negatively correlated events - events = positively_correlated_events[:10] + negatively_correlated_events[:10] - return events, skewed_totals - - def construct_people_url( - self, - success: bool, - event_definition: EventDefinition, - cache_invalidation_key: str, - ) -> Optional[str]: - """ - Given an event_definition and success/failure flag, returns a url that - get be used to GET the associated people for the event/sucess pair. The - primary purpose of this is to reduce the risk of clients of the API - fetching incorrect people, given an event definition. - """ - if not self._filter.correlation_type or self._filter.correlation_type == FunnelCorrelationType.EVENTS: - return self.construct_event_correlation_people_url( - success=success, - event_definition=event_definition, - cache_invalidation_key=cache_invalidation_key, - ) - - elif self._filter.correlation_type == FunnelCorrelationType.EVENT_WITH_PROPERTIES: - return self.construct_event_with_properties_people_url( - success=success, - event_definition=event_definition, - cache_invalidation_key=cache_invalidation_key, - ) - - elif self._filter.correlation_type == FunnelCorrelationType.PROPERTIES: - return self.construct_person_properties_people_url( - success=success, - event_definition=event_definition, - cache_invalidation_key=cache_invalidation_key, - ) - - return None - - def construct_event_correlation_people_url( - self, - success: bool, - event_definition: EventDefinition, - cache_invalidation_key: str, - ) -> str: - # NOTE: we need to convert certain params to strings. I don't think this - # class should need to know these details, but shallow_clone is - # expecting the values as they are serialized in the url - # TODO: remove url serialization details from this class, it likely - # belongs in the application layer, or perhaps `FunnelCorrelationPeople` - params = self._filter.shallow_clone( - { - "funnel_correlation_person_converted": "true" if success else "false", - "funnel_correlation_person_entity": { - "id": event_definition["event"], - "type": "events", - }, - } - ).to_params() - return f"{self._base_uri}api/person/funnel/correlation/?{urllib.parse.urlencode(params)}&cache_invalidation_key={cache_invalidation_key}" - - def construct_event_with_properties_people_url( - self, - success: bool, - event_definition: EventDefinition, - cache_invalidation_key: str, - ) -> str: - if self.support_autocapture_elements(): - # If we have an $autocapture event, we need to special case the - # url by converting the `elements` chain into an `Action` - event_name, _, _ = event_definition["event"].split("::") - elements = event_definition["elements"] - first_element = elements[0] - elements_as_action = { - "tag_name": first_element["tag_name"], - "href": first_element["href"], - "text": first_element["text"], - "selector": build_selector(elements), - } - params = self._filter.shallow_clone( - { - "funnel_correlation_person_converted": "true" if success else "false", - "funnel_correlation_person_entity": { - "id": event_name, - "type": "events", - "properties": [ - { - "key": property_key, - "value": [property_value], - "type": "element", - "operator": "exact", - } - for property_key, property_value in elements_as_action.items() - if property_value is not None - ], - }, - } - ).to_params() - return f"{self._base_uri}api/person/funnel/correlation/?{urllib.parse.urlencode(params)}&cache_invalidation_key={cache_invalidation_key}" - - event_name, property_name, property_value = event_definition["event"].split("::") - params = self._filter.shallow_clone( - { - "funnel_correlation_person_converted": "true" if success else "false", - "funnel_correlation_person_entity": { - "id": event_name, - "type": "events", - "properties": [ - { - "key": property_name, - "value": property_value, - "type": "event", - "operator": "exact", - } - ], - }, - } - ).to_params() - return f"{self._base_uri}api/person/funnel/correlation/?{urllib.parse.urlencode(params)}" - - def construct_person_properties_people_url( - self, - success: bool, - event_definition: EventDefinition, - cache_invalidation_key: str, - ) -> str: - # NOTE: for property correlations, we just use the regular funnel - # persons endpoint, with the breakdown value set, and we assume that - # event.event will be of the format "{property_name}::{property_value}" - property_name, property_value = event_definition["event"].split("::") - prop_type = "group" if self._filter.aggregation_group_type_index else "person" - params = self._filter.shallow_clone( - { - "funnel_correlation_person_converted": "true" if success else "false", - "funnel_correlation_property_values": [ - { - "key": property_name, - "value": property_value, - "type": prop_type, - "operator": "exact", - "group_type_index": self._filter.aggregation_group_type_index, - } - ], - } - ).to_params() - return f"{self._base_uri}api/person/funnel/correlation?{urllib.parse.urlencode(params)}&cache_invalidation_key={cache_invalidation_key}" - - def format_results(self, results: tuple[list[EventOddsRatio], bool]) -> FunnelCorrelationResponse: - odds_ratios, skewed_totals = results - return { - "events": [self.serialize_event_odds_ratio(odds_ratio=odds_ratio) for odds_ratio in odds_ratios], - "skewed": skewed_totals, - } - - def run(self) -> FunnelCorrelationResponse: - if not self._filter.entities: - return FunnelCorrelationResponse(events=[], skewed=False) - - return self.format_results(self._run()) - - def get_partial_event_contingency_tables(self) -> tuple[list[EventContingencyTable], int, int]: - """ - For each event a person that started going through the funnel, gets stats - for how many of these users are sucessful and how many are unsuccessful. - - It's a partial table as it doesn't include numbers of the negation of the - event, but does include the total success/failure numbers, which is enough - for us to calculate the odds ratio. - """ - - query, params = self.get_contingency_table_query() - results_with_total = insight_sync_execute( - query, - {**params, **self._filter.hogql_context.values}, - query_type="funnel_correlation", - filter=self._filter, - team_id=self._team.pk, - ) - - # Get the total success/failure counts from the results - results = [result for result in results_with_total if result[0] != self.TOTAL_IDENTIFIER] - _, success_total, failure_total = next( - result for result in results_with_total if result[0] == self.TOTAL_IDENTIFIER - ) - - # Add a little structure, and keep it close to the query definition so it's - # obvious what's going on with result indices. - return ( - [ - EventContingencyTable( - event=result[0], - visited=EventStats(success_count=result[1], failure_count=result[2]), - success_total=success_total, - failure_total=failure_total, - ) - for result in results - ], - success_total, - failure_total, - ) - - def get_funnel_actors_cte(self) -> tuple[str, dict[str, Any]]: - extra_fields = ["steps", "final_timestamp", "first_timestamp"] - - for prop in self.properties_to_include: - extra_fields.append(prop) - - return self._funnel_actors_generator.actor_query(limit_actors=False, extra_fields=extra_fields) - - @staticmethod - def are_results_insignificant(event_contingency_table: EventContingencyTable) -> bool: - """ - Check if the results are insignificant, i.e. if the success/failure counts are - significantly different from the total counts - """ - - total_count = event_contingency_table.success_total + event_contingency_table.failure_total - - if event_contingency_table.visited.success_count + event_contingency_table.visited.failure_count < min( - FunnelCorrelation.MIN_PERSON_COUNT, - FunnelCorrelation.MIN_PERSON_PERCENTAGE * total_count, - ): - return True - - return False - - def serialize_event_odds_ratio(self, odds_ratio: EventOddsRatio) -> EventOddsRatioSerialized: - event_definition = self.serialize_event_with_property(event=odds_ratio["event"]) - cache_invalidation_key = generate_short_id() - return { - "success_count": odds_ratio["success_count"], - "success_people_url": self.construct_people_url( - success=True, - event_definition=event_definition, - cache_invalidation_key=cache_invalidation_key, - ), - "failure_count": odds_ratio["failure_count"], - "failure_people_url": self.construct_people_url( - success=False, - event_definition=event_definition, - cache_invalidation_key=cache_invalidation_key, - ), - "odds_ratio": odds_ratio["odds_ratio"], - "correlation_type": odds_ratio["correlation_type"], - "event": event_definition, - } - - def serialize_event_with_property(self, event: str) -> EventDefinition: - """ - Format the event name for display. - """ - if not self.support_autocapture_elements(): - return EventDefinition(event=event, properties={}, elements=[]) - - event_name, property_name, property_value = event.split("::") - if event_name == AUTOCAPTURE_EVENT and property_name == "elements_chain": - event_type, elements_chain = property_value.split(self.ELEMENTS_DIVIDER) - return EventDefinition( - event=event, - properties={self.AUTOCAPTURE_EVENT_TYPE: event_type}, - elements=cast( - list, - ElementSerializer(chain_to_elements(elements_chain), many=True).data, - ), - ) - - return EventDefinition(event=event, properties={}, elements=[]) - - -def get_entity_odds_ratio(event_contingency_table: EventContingencyTable, prior_counts: int) -> EventOddsRatio: - # Add 1 to all values to prevent divide by zero errors, and introduce a [prior](https://en.wikipedia.org/wiki/Prior_probability) - odds_ratio = ( - (event_contingency_table.visited.success_count + prior_counts) - * (event_contingency_table.failure_total - event_contingency_table.visited.failure_count + prior_counts) - ) / ( - (event_contingency_table.success_total - event_contingency_table.visited.success_count + prior_counts) - * (event_contingency_table.visited.failure_count + prior_counts) - ) - - return EventOddsRatio( - event=event_contingency_table.event, - success_count=event_contingency_table.visited.success_count, - failure_count=event_contingency_table.visited.failure_count, - odds_ratio=odds_ratio, - correlation_type="success" if odds_ratio > 1 else "failure", - ) - - -def build_selector(elements: list[dict[str, Any]]) -> str: - # build a CSS select given an "elements_chain" - # NOTE: my source of what this should be doing is - # https://github.com/PostHog/posthog/blob/cc054930a47fb59940531e99a856add49a348ee5/frontend/src/scenes/events/createActionFromEvent.tsx#L36:L36 - # - def element_to_selector(element: dict[str, Any]) -> str: - if attr_id := element.get("attr_id"): - return f'[id="{attr_id}"]' - - return element["tag_name"] - - return " > ".join([element_to_selector(element) for element in elements]) diff --git a/ee/clickhouse/queries/funnels/funnel_correlation_persons.py b/ee/clickhouse/queries/funnels/funnel_correlation_persons.py deleted file mode 100644 index b02a8b8e9b..0000000000 --- a/ee/clickhouse/queries/funnels/funnel_correlation_persons.py +++ /dev/null @@ -1,211 +0,0 @@ -from typing import Optional, Union - -from django.db.models.query import QuerySet -from rest_framework.exceptions import ValidationError - -from ee.clickhouse.queries.funnels.funnel_correlation import FunnelCorrelation -from posthog.constants import ( - FUNNEL_CORRELATION_PERSON_LIMIT, - FunnelCorrelationType, - PropertyOperatorType, -) -from posthog.models import Person -from posthog.models.entity import Entity -from posthog.models.filters.filter import Filter -from posthog.models.filters.mixins.utils import cached_property -from posthog.models.group import Group -from posthog.models.team import Team -from posthog.queries.actor_base_query import ( - ActorBaseQuery, - SerializedGroup, - SerializedPerson, -) -from posthog.queries.funnels.funnel_event_query import FunnelEventQuery -from posthog.queries.util import get_person_properties_mode - - -class FunnelCorrelationActors(ActorBaseQuery): - _filter: Filter - QUERY_TYPE = "funnel_correlation_actors" - - def __init__(self, filter: Filter, team: Team, base_uri: str = "/", **kwargs) -> None: - self._base_uri = base_uri - self._filter = filter - self._team = team - - if not self._filter.correlation_person_limit: - self._filter = self._filter.shallow_clone({FUNNEL_CORRELATION_PERSON_LIMIT: 100}) - - @cached_property - def aggregation_group_type_index(self): - return self._filter.aggregation_group_type_index - - def actor_query(self, limit_actors: Optional[bool] = True): - if self._filter.correlation_type == FunnelCorrelationType.PROPERTIES: - return _FunnelPropertyCorrelationActors(self._filter, self._team, self._base_uri).actor_query( - limit_actors=limit_actors - ) - else: - return _FunnelEventsCorrelationActors(self._filter, self._team, self._base_uri).actor_query( - limit_actors=limit_actors - ) - - def get_actors( - self, - ) -> tuple[ - Union[QuerySet[Person], QuerySet[Group]], - Union[list[SerializedGroup], list[SerializedPerson]], - int, - ]: - if self._filter.correlation_type == FunnelCorrelationType.PROPERTIES: - return _FunnelPropertyCorrelationActors(self._filter, self._team, self._base_uri).get_actors() - else: - return _FunnelEventsCorrelationActors(self._filter, self._team, self._base_uri).get_actors() - - -class _FunnelEventsCorrelationActors(ActorBaseQuery): - _filter: Filter - QUERY_TYPE = "funnel_events_correlation_actors" - - def __init__(self, filter: Filter, team: Team, base_uri: str = "/") -> None: - self._funnel_correlation = FunnelCorrelation(filter, team, base_uri=base_uri) - super().__init__(team, filter) - - @cached_property - def aggregation_group_type_index(self): - return self._filter.aggregation_group_type_index - - def actor_query(self, limit_actors: Optional[bool] = True): - if not self._filter.correlation_person_entity: - raise ValidationError("No entity for persons specified") - - assert isinstance(self._filter.correlation_person_entity, Entity) - - ( - funnel_persons_query, - funnel_persons_params, - ) = self._funnel_correlation.get_funnel_actors_cte() - - prop_filters = self._filter.correlation_person_entity.property_groups - - # TRICKY: We use "events" as an alias here while the eventquery uses "e" by default - event_query = FunnelEventQuery(self._filter, self._team) - event_query.EVENT_TABLE_ALIAS = "events" - - prop_query, prop_params = event_query._get_prop_groups( - prop_filters, - person_properties_mode=get_person_properties_mode(self._team), - person_id_joined_alias=event_query._get_person_id_alias(self._team.person_on_events_mode), - ) - - conversion_filter = ( - f'AND actors.steps {"=" if self._filter.correlation_persons_converted else "<>"} target_step' - if self._filter.correlation_persons_converted is not None - else "" - ) - - event_join_query = self._funnel_correlation._get_events_join_query() - - recording_event_select_statement = ( - ", any(actors.matching_events) AS matching_events" if self._filter.include_recordings else "" - ) - - query = f""" - WITH - funnel_actors as ({funnel_persons_query}), - toDateTime(%(date_to)s, %(timezone)s) AS date_to, - toDateTime(%(date_from)s, %(timezone)s) AS date_from, - %(target_step)s AS target_step, - %(funnel_step_names)s as funnel_step_names - SELECT - actors.actor_id AS actor_id - {recording_event_select_statement} - FROM events AS event - {event_join_query} - AND event.event = %(target_event)s - {conversion_filter} - {prop_query} - GROUP BY actor_id - ORDER BY actor_id - {"LIMIT %(limit)s" if limit_actors else ""} - {"OFFSET %(offset)s" if limit_actors else ""} - """ - - params = { - **funnel_persons_params, - **prop_params, - "target_event": self._filter.correlation_person_entity.id, - "funnel_step_names": [entity.id for entity in self._filter.events], - "target_step": len(self._filter.entities), - "limit": self._filter.correlation_person_limit, - "offset": self._filter.correlation_person_offset, - } - - return query, params - - -class _FunnelPropertyCorrelationActors(ActorBaseQuery): - _filter: Filter - QUERY_TYPE = "funnel_property_correlation_actors" - - def __init__(self, filter: Filter, team: Team, base_uri: str = "/") -> None: - # Filtering on persons / groups properties can be pushed down to funnel_actors CTE - new_correlation_filter = filter.shallow_clone( - { - "properties": filter.property_groups.combine_properties( - PropertyOperatorType.AND, filter.correlation_property_values or [] - ).to_dict() - } - ) - self._funnel_correlation = FunnelCorrelation(new_correlation_filter, team, base_uri=base_uri) - super().__init__(team, filter) - - @cached_property - def aggregation_group_type_index(self): - return self._filter.aggregation_group_type_index - - def actor_query( - self, - limit_actors: Optional[bool] = True, - extra_fields: Optional[list[str]] = None, - ): - if not self._filter.correlation_property_values: - raise ValidationError("Property Correlation expects atleast one Property to get persons for") - - ( - funnel_persons_query, - funnel_persons_params, - ) = self._funnel_correlation.get_funnel_actors_cte() - - conversion_filter = ( - f'funnel_actors.steps {"=" if self._filter.correlation_persons_converted else "<>"} target_step' - if self._filter.correlation_persons_converted is not None - else "" - ) - - recording_event_select_statement = ( - ", any(funnel_actors.matching_events) AS matching_events" if self._filter.include_recordings else "" - ) - - query = f""" - WITH - funnel_actors AS ({funnel_persons_query}), - %(target_step)s AS target_step - SELECT - funnel_actors.actor_id AS actor_id - {recording_event_select_statement} - FROM funnel_actors - WHERE {conversion_filter} - GROUP BY funnel_actors.actor_id - ORDER BY actor_id - {"LIMIT %(limit)s" if limit_actors else ""} - {"OFFSET %(offset)s" if limit_actors else ""} - """ - params = { - **funnel_persons_params, - "target_step": len(self._filter.entities), - "limit": self._filter.correlation_person_limit, - "offset": self._filter.correlation_person_offset, - } - - return query, params diff --git a/ee/clickhouse/queries/funnels/test/__init__.py b/ee/clickhouse/queries/funnels/test/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/clickhouse/queries/funnels/test/__snapshots__/test_funnel.ambr b/ee/clickhouse/queries/funnels/test/__snapshots__/test_funnel.ambr deleted file mode 100644 index 10700d192c..0000000000 --- a/ee/clickhouse/queries/funnels/test/__snapshots__/test_funnel.ambr +++ /dev/null @@ -1,3540 +0,0 @@ -# serializer version: 1 -# name: TestFunnelGroupBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(group0_properties, 'industry'), '^"|"$', '') AS value, - count(*) as count - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (NOT has([''], "$group_0")) - AND notEmpty(e.person_id) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: TestFunnelGroupBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events.1 - ''' - - SELECT countIf(steps = 1) step_1, - countIf(steps = 2) step_2, - countIf(steps = 3) step_3, - avg(step_1_average_conversion_time_inner) step_1_average_conversion_time, - avg(step_2_average_conversion_time_inner) step_2_average_conversion_time, - median(step_1_median_conversion_time_inner) step_1_median_conversion_time, - median(step_2_median_conversion_time_inner) step_2_median_conversion_time, - prop - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - avg(step_2_conversion_time) step_2_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner , - prop - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target, - prop) as max_steps, - step_1_conversion_time, - step_2_conversion_time , - prop - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY - AND latest_1 <= latest_2 - AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - if(isNotNull(latest_2) - AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time, - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - if(latest_2 < latest_1, NULL, latest_2) as latest_2 , - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - if(has(['technology', 'finance'], prop), prop, 'Other') as prop - FROM - (SELECT *, - prop_vals as prop - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - e.person_id as person_id, - if(event = 'sign up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'play movie', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = 'buy', 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2, - replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (NOT has([''], "$group_0")) - AND notEmpty(e.person_id) - AND (step_0 = 1 - OR step_1 = 1 - OR step_2 = 1) ))))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps, - prop - HAVING steps = max(max_steps)) - GROUP BY prop - ''' -# --- -# name: TestFunnelGroupBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events_poe_v2 - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(group0_properties, 'industry'), '^"|"$', '') AS value, - count(*) as count - FROM events e - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0)) AS overrides ON e.distinct_id = overrides.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (NOT has([''], "$group_0")) - AND notEmpty(e.person_id) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: TestFunnelGroupBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events_poe_v2.1 - ''' - - SELECT countIf(steps = 1) step_1, - countIf(steps = 2) step_2, - countIf(steps = 3) step_3, - avg(step_1_average_conversion_time_inner) step_1_average_conversion_time, - avg(step_2_average_conversion_time_inner) step_2_average_conversion_time, - median(step_1_median_conversion_time_inner) step_1_median_conversion_time, - median(step_2_median_conversion_time_inner) step_2_median_conversion_time, - prop - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - avg(step_2_conversion_time) step_2_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner , - prop - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target, - prop) as max_steps, - step_1_conversion_time, - step_2_conversion_time , - prop - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY - AND latest_1 <= latest_2 - AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - if(isNotNull(latest_2) - AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time, - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - if(latest_2 < latest_1, NULL, latest_2) as latest_2 , - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - if(has(['technology', 'finance'], prop), prop, 'Other') as prop - FROM - (SELECT *, - prop_vals as prop - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(notEmpty(overrides.distinct_id), overrides.person_id, e.person_id) as person_id, - if(event = 'sign up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'play movie', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = 'buy', 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2, - replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0)) AS overrides ON e.distinct_id = overrides.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (NOT has([''], "$group_0")) - AND notEmpty(e.person_id) - AND (step_0 = 1 - OR step_1 = 1 - OR step_2 = 1) ))))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps, - prop - HAVING steps = max(max_steps)) - GROUP BY prop - ''' -# --- -# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, - count(*) as count - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.1 - ''' - - SELECT countIf(steps = 1) step_1, - countIf(steps = 2) step_2, - countIf(steps = 3) step_3, - avg(step_1_average_conversion_time_inner) step_1_average_conversion_time, - avg(step_2_average_conversion_time_inner) step_2_average_conversion_time, - median(step_1_median_conversion_time_inner) step_1_median_conversion_time, - median(step_2_median_conversion_time_inner) step_2_median_conversion_time, - prop - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - avg(step_2_conversion_time) step_2_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner , - prop - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target, - prop) as max_steps, - step_1_conversion_time, - step_2_conversion_time , - prop - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY - AND latest_1 <= latest_2 - AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - if(isNotNull(latest_2) - AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time, - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - if(latest_2 < latest_1, NULL, latest_2) as latest_2 , - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - if(has(['technology', 'finance'], prop), prop, 'Other') as prop - FROM - (SELECT *, - prop_vals as prop - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'sign up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'play movie', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = 'buy', 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2, - replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1 - OR step_2 = 1) ))))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps, - prop - HAVING steps = max(max_steps)) - GROUP BY prop - ''' -# --- -# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.2 - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, - count(*) as count - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.3 - ''' - - SELECT aggregation_target AS actor_id - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - avg(step_2_conversion_time) step_2_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner , - prop - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target, - prop) as max_steps, - step_1_conversion_time, - step_2_conversion_time , - prop - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY - AND latest_1 <= latest_2 - AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - if(isNotNull(latest_2) - AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time, - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - if(latest_2 < latest_1, NULL, latest_2) as latest_2 , - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - if(has(['technology', 'finance'], prop), prop, 'Other') as prop - FROM - (SELECT *, - prop_vals as prop - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'sign up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'play movie', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = 'buy', 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2, - replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1 - OR step_2 = 1) ))))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps, - prop - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2, 3] - AND arrayFlatten(array(prop)) = arrayFlatten(array('finance')) - ORDER BY aggregation_target - LIMIT 100 - OFFSET 0 SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000 - ''' -# --- -# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.4 - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, - count(*) as count - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.5 - ''' - - SELECT aggregation_target AS actor_id - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - avg(step_2_conversion_time) step_2_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner , - prop - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target, - prop) as max_steps, - step_1_conversion_time, - step_2_conversion_time , - prop - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY - AND latest_1 <= latest_2 - AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - if(isNotNull(latest_2) - AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time, - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - if(latest_2 < latest_1, NULL, latest_2) as latest_2 , - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - if(has(['technology', 'finance'], prop), prop, 'Other') as prop - FROM - (SELECT *, - prop_vals as prop - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'sign up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'play movie', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = 'buy', 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2, - replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1 - OR step_2 = 1) ))))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps, - prop - HAVING steps = max(max_steps)) - WHERE steps IN [2, 3] - AND arrayFlatten(array(prop)) = arrayFlatten(array('finance')) - ORDER BY aggregation_target - LIMIT 100 - OFFSET 0 SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000 - ''' -# --- -# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.6 - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, - count(*) as count - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.7 - ''' - - SELECT aggregation_target AS actor_id - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - avg(step_2_conversion_time) step_2_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner , - prop - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target, - prop) as max_steps, - step_1_conversion_time, - step_2_conversion_time , - prop - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY - AND latest_1 <= latest_2 - AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - if(isNotNull(latest_2) - AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time, - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - if(latest_2 < latest_1, NULL, latest_2) as latest_2 , - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - if(has(['technology', 'finance'], prop), prop, 'Other') as prop - FROM - (SELECT *, - prop_vals as prop - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'sign up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'play movie', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = 'buy', 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2, - replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1 - OR step_2 = 1) ))))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps, - prop - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2, 3] - AND arrayFlatten(array(prop)) = arrayFlatten(array('technology')) - ORDER BY aggregation_target - LIMIT 100 - OFFSET 0 SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000 - ''' -# --- -# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.8 - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, - count(*) as count - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.9 - ''' - - SELECT aggregation_target AS actor_id - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - avg(step_2_conversion_time) step_2_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner , - prop - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target, - prop) as max_steps, - step_1_conversion_time, - step_2_conversion_time , - prop - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY - AND latest_1 <= latest_2 - AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - if(isNotNull(latest_2) - AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time, - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - if(latest_2 < latest_1, NULL, latest_2) as latest_2 , - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - if(has(['technology', 'finance'], prop), prop, 'Other') as prop - FROM - (SELECT *, - prop_vals as prop - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'sign up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'play movie', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = 'buy', 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2, - replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1 - OR step_2 = 1) ))))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps, - prop - HAVING steps = max(max_steps)) - WHERE steps IN [2, 3] - AND arrayFlatten(array(prop)) = arrayFlatten(array('technology')) - ORDER BY aggregation_target - LIMIT 100 - OFFSET 0 SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000 - ''' -# --- -# name: TestStrictFunnelGroupBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(group0_properties, 'industry'), '^"|"$', '') AS value, - count(*) as count - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (NOT has([''], "$group_0")) - AND notEmpty(e.person_id) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: TestStrictFunnelGroupBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events.1 - ''' - - SELECT countIf(steps = 1) step_1, - countIf(steps = 2) step_2, - countIf(steps = 3) step_3, - avg(step_1_average_conversion_time_inner) step_1_average_conversion_time, - avg(step_2_average_conversion_time_inner) step_2_average_conversion_time, - median(step_1_median_conversion_time_inner) step_1_median_conversion_time, - median(step_2_median_conversion_time_inner) step_2_median_conversion_time, - prop - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - avg(step_2_conversion_time) step_2_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner , - prop - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target, - prop) as max_steps, - step_1_conversion_time, - step_2_conversion_time , - prop - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY - AND latest_1 <= latest_2 - AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - if(isNotNull(latest_2) - AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time, - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - if(latest_2 < latest_1, NULL, latest_2) as latest_2 , - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - if(has(['technology', 'finance'], prop), prop, 'Other') as prop - FROM - (SELECT *, - prop_vals as prop - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - e.person_id as person_id, - if(event = 'sign up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'play movie', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = 'buy', 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2, - replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (NOT has([''], "$group_0")) - AND notEmpty(e.person_id) - AND (step_0 = 1 - OR step_1 = 1 - OR step_2 = 1) ))))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps, - prop - HAVING steps = max(max_steps)) - GROUP BY prop - ''' -# --- -# name: TestStrictFunnelGroupBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events_poe_v2 - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(group0_properties, 'industry'), '^"|"$', '') AS value, - count(*) as count - FROM events e - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0)) AS overrides ON e.distinct_id = overrides.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (NOT has([''], "$group_0")) - AND notEmpty(e.person_id) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: TestStrictFunnelGroupBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events_poe_v2.1 - ''' - - SELECT countIf(steps = 1) step_1, - countIf(steps = 2) step_2, - countIf(steps = 3) step_3, - avg(step_1_average_conversion_time_inner) step_1_average_conversion_time, - avg(step_2_average_conversion_time_inner) step_2_average_conversion_time, - median(step_1_median_conversion_time_inner) step_1_median_conversion_time, - median(step_2_median_conversion_time_inner) step_2_median_conversion_time, - prop - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - avg(step_2_conversion_time) step_2_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner , - prop - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target, - prop) as max_steps, - step_1_conversion_time, - step_2_conversion_time , - prop - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY - AND latest_1 <= latest_2 - AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - if(isNotNull(latest_2) - AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time, - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - if(latest_2 < latest_1, NULL, latest_2) as latest_2 , - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - if(has(['technology', 'finance'], prop), prop, 'Other') as prop - FROM - (SELECT *, - prop_vals as prop - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(notEmpty(overrides.distinct_id), overrides.person_id, e.person_id) as person_id, - if(event = 'sign up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'play movie', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = 'buy', 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2, - replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0)) AS overrides ON e.distinct_id = overrides.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (NOT has([''], "$group_0")) - AND notEmpty(e.person_id) - AND (step_0 = 1 - OR step_1 = 1 - OR step_2 = 1) ))))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps, - prop - HAVING steps = max(max_steps)) - GROUP BY prop - ''' -# --- -# name: TestStrictFunnelGroupBreakdown.test_funnel_breakdown_group - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, - count(*) as count - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: TestStrictFunnelGroupBreakdown.test_funnel_breakdown_group.1 - ''' - - SELECT countIf(steps = 1) step_1, - countIf(steps = 2) step_2, - countIf(steps = 3) step_3, - avg(step_1_average_conversion_time_inner) step_1_average_conversion_time, - avg(step_2_average_conversion_time_inner) step_2_average_conversion_time, - median(step_1_median_conversion_time_inner) step_1_median_conversion_time, - median(step_2_median_conversion_time_inner) step_2_median_conversion_time, - prop - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - avg(step_2_conversion_time) step_2_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner , - prop - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target, - prop) as max_steps, - step_1_conversion_time, - step_2_conversion_time , - prop - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY - AND latest_1 <= latest_2 - AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - if(isNotNull(latest_2) - AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time, - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - if(latest_2 < latest_1, NULL, latest_2) as latest_2 , - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - if(has(['technology', 'finance'], prop), prop, 'Other') as prop - FROM - (SELECT *, - prop_vals as prop - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'sign up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'play movie', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = 'buy', 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2, - replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1 - OR step_2 = 1) ))))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps, - prop - HAVING steps = max(max_steps)) - GROUP BY prop - ''' -# --- -# name: TestStrictFunnelGroupBreakdown.test_funnel_breakdown_group.2 - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, - count(*) as count - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: TestStrictFunnelGroupBreakdown.test_funnel_breakdown_group.3 - ''' - - SELECT aggregation_target AS actor_id - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - avg(step_2_conversion_time) step_2_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner, - prop - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target, - prop) as max_steps, - step_1_conversion_time, - step_2_conversion_time, - prop - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY - AND latest_1 <= latest_2 - AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps, - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - if(isNotNull(latest_2) - AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN 2 PRECEDING AND 2 PRECEDING) latest_2 , - if(has(['technology', 'finance'], prop), prop, 'Other') as prop - FROM - (SELECT *, - prop_vals as prop - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'sign up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'play movie', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = 'buy', 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2, - replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (1=1) ))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps, - prop - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2, 3] - AND arrayFlatten(array(prop)) = arrayFlatten(array('finance')) - ORDER BY aggregation_target - LIMIT 100 - OFFSET 0 SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000 - ''' -# --- -# name: TestStrictFunnelGroupBreakdown.test_funnel_breakdown_group.4 - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, - count(*) as count - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: TestStrictFunnelGroupBreakdown.test_funnel_breakdown_group.5 - ''' - - SELECT aggregation_target AS actor_id - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - avg(step_2_conversion_time) step_2_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner, - prop - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target, - prop) as max_steps, - step_1_conversion_time, - step_2_conversion_time, - prop - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY - AND latest_1 <= latest_2 - AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps, - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - if(isNotNull(latest_2) - AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN 2 PRECEDING AND 2 PRECEDING) latest_2 , - if(has(['technology', 'finance'], prop), prop, 'Other') as prop - FROM - (SELECT *, - prop_vals as prop - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'sign up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'play movie', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = 'buy', 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2, - replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (1=1) ))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps, - prop - HAVING steps = max(max_steps)) - WHERE steps IN [2, 3] - AND arrayFlatten(array(prop)) = arrayFlatten(array('finance')) - ORDER BY aggregation_target - LIMIT 100 - OFFSET 0 SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000 - ''' -# --- -# name: TestStrictFunnelGroupBreakdown.test_funnel_breakdown_group.6 - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, - count(*) as count - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: TestStrictFunnelGroupBreakdown.test_funnel_breakdown_group.7 - ''' - - SELECT aggregation_target AS actor_id - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - avg(step_2_conversion_time) step_2_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner, - prop - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target, - prop) as max_steps, - step_1_conversion_time, - step_2_conversion_time, - prop - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY - AND latest_1 <= latest_2 - AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps, - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - if(isNotNull(latest_2) - AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN 2 PRECEDING AND 2 PRECEDING) latest_2 , - if(has(['technology', 'finance'], prop), prop, 'Other') as prop - FROM - (SELECT *, - prop_vals as prop - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'sign up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'play movie', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = 'buy', 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2, - replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (1=1) ))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps, - prop - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2, 3] - AND arrayFlatten(array(prop)) = arrayFlatten(array('technology')) - ORDER BY aggregation_target - LIMIT 100 - OFFSET 0 SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000 - ''' -# --- -# name: TestStrictFunnelGroupBreakdown.test_funnel_breakdown_group.8 - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, - count(*) as count - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: TestStrictFunnelGroupBreakdown.test_funnel_breakdown_group.9 - ''' - - SELECT aggregation_target AS actor_id - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - avg(step_2_conversion_time) step_2_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner, - prop - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target, - prop) as max_steps, - step_1_conversion_time, - step_2_conversion_time, - prop - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY - AND latest_1 <= latest_2 - AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps, - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - if(isNotNull(latest_2) - AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN 2 PRECEDING AND 2 PRECEDING) latest_2 , - if(has(['technology', 'finance'], prop), prop, 'Other') as prop - FROM - (SELECT *, - prop_vals as prop - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'sign up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'play movie', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = 'buy', 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2, - replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (1=1) ))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps, - prop - HAVING steps = max(max_steps)) - WHERE steps IN [2, 3] - AND arrayFlatten(array(prop)) = arrayFlatten(array('technology')) - ORDER BY aggregation_target - LIMIT 100 - OFFSET 0 SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000 - ''' -# --- -# name: TestUnorderedFunnelGroupBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(group0_properties, 'industry'), '^"|"$', '') AS value, - count(*) as count - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (NOT has([''], "$group_0")) - AND notEmpty(e.person_id) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: TestUnorderedFunnelGroupBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events.1 - ''' - - SELECT countIf(steps = 1) step_1, - countIf(steps = 2) step_2, - countIf(steps = 3) step_3, - avg(step_1_average_conversion_time_inner) step_1_average_conversion_time, - avg(step_2_average_conversion_time_inner) step_2_average_conversion_time, - median(step_1_median_conversion_time_inner) step_1_median_conversion_time, - median(step_2_median_conversion_time_inner) step_2_median_conversion_time, - prop - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - avg(step_2_conversion_time) step_2_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner , - prop - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target, - prop) as max_steps, - step_1_conversion_time, - step_2_conversion_time , - prop - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY - AND latest_1 <= latest_2 - AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - if(isNotNull(latest_2) - AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time, - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - if(latest_2 < latest_1, NULL, latest_2) as latest_2 , - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - if(has(['technology', 'finance'], prop), prop, 'Other') as prop - FROM - (SELECT *, - prop_vals as prop - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - e.person_id as person_id, - if(event = 'sign up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'play movie', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = 'buy', 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2, - replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (NOT has([''], "$group_0")) - AND notEmpty(e.person_id) - AND (step_0 = 1 - OR step_1 = 1 - OR step_2 = 1) ))))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps, - prop - HAVING steps = max(max_steps)) - GROUP BY prop - ''' -# --- -# name: TestUnorderedFunnelGroupBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events_poe_v2 - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(group0_properties, 'industry'), '^"|"$', '') AS value, - count(*) as count - FROM events e - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0)) AS overrides ON e.distinct_id = overrides.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (NOT has([''], "$group_0")) - AND notEmpty(e.person_id) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: TestUnorderedFunnelGroupBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events_poe_v2.1 - ''' - - SELECT countIf(steps = 1) step_1, - countIf(steps = 2) step_2, - countIf(steps = 3) step_3, - avg(step_1_average_conversion_time_inner) step_1_average_conversion_time, - avg(step_2_average_conversion_time_inner) step_2_average_conversion_time, - median(step_1_median_conversion_time_inner) step_1_median_conversion_time, - median(step_2_median_conversion_time_inner) step_2_median_conversion_time, - prop - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - avg(step_2_conversion_time) step_2_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner , - prop - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target, - prop) as max_steps, - step_1_conversion_time, - step_2_conversion_time , - prop - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY - AND latest_1 <= latest_2 - AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - if(isNotNull(latest_2) - AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time, - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - if(latest_2 < latest_1, NULL, latest_2) as latest_2 , - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - if(has(['technology', 'finance'], prop), prop, 'Other') as prop - FROM - (SELECT *, - prop_vals as prop - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(notEmpty(overrides.distinct_id), overrides.person_id, e.person_id) as person_id, - if(event = 'sign up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'play movie', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = 'buy', 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2, - replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0)) AS overrides ON e.distinct_id = overrides.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (NOT has([''], "$group_0")) - AND notEmpty(e.person_id) - AND (step_0 = 1 - OR step_1 = 1 - OR step_2 = 1) ))))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps, - prop - HAVING steps = max(max_steps)) - GROUP BY prop - ''' -# --- -# name: TestUnorderedFunnelGroupBreakdown.test_funnel_breakdown_group - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, - count(*) as count - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: TestUnorderedFunnelGroupBreakdown.test_funnel_breakdown_group.1 - ''' - - SELECT countIf(steps = 1) step_1, - countIf(steps = 2) step_2, - countIf(steps = 3) step_3, - avg(step_1_average_conversion_time_inner) step_1_average_conversion_time, - avg(step_2_average_conversion_time_inner) step_2_average_conversion_time, - median(step_1_median_conversion_time_inner) step_1_median_conversion_time, - median(step_2_median_conversion_time_inner) step_2_median_conversion_time, - prop - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - avg(step_2_conversion_time) step_2_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner , - prop - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target, - prop) as max_steps, - step_1_conversion_time, - step_2_conversion_time , - prop - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY - AND latest_1 <= latest_2 - AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - if(isNotNull(latest_2) - AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time, - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - if(latest_2 < latest_1, NULL, latest_2) as latest_2 , - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - if(has(['technology', 'finance'], prop), prop, 'Other') as prop - FROM - (SELECT *, - prop_vals as prop - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'sign up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'play movie', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = 'buy', 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2, - replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1 - OR step_2 = 1) ))))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps, - prop - HAVING steps = max(max_steps)) - GROUP BY prop - ''' -# --- -# name: TestUnorderedFunnelGroupBreakdown.test_funnel_breakdown_group.10 - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, - count(*) as count - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: TestUnorderedFunnelGroupBreakdown.test_funnel_breakdown_group.11 - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, - count(*) as count - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: TestUnorderedFunnelGroupBreakdown.test_funnel_breakdown_group.12 - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, - count(*) as count - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: TestUnorderedFunnelGroupBreakdown.test_funnel_breakdown_group.13 - ''' - - SELECT aggregation_target AS actor_id - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - avg(step_2_conversion_time) step_2_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner, - prop - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target, - prop) as max_steps, - step_1_conversion_time, - step_2_conversion_time, - prop - FROM - (SELECT *, - arraySort([latest_0,latest_1,latest_2]) as event_times, - arraySum([if(latest_0 < latest_1 AND latest_1 <= latest_0 + INTERVAL 7 DAY, 1, 0),if(latest_0 < latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 1, 0), 1]) AS steps , - arraySort([latest_0,latest_1,latest_2]) as conversion_times, - if(isNotNull(conversion_times[2]) - AND conversion_times[2] <= conversion_times[1] + INTERVAL 7 DAY, dateDiff('second', conversion_times[1], conversion_times[2]), NULL) step_1_conversion_time, - if(isNotNull(conversion_times[3]) - AND conversion_times[3] <= conversion_times[2] + INTERVAL 7 DAY, dateDiff('second', conversion_times[2], conversion_times[3]), NULL) step_2_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - if(has(['technology', 'finance'], prop), prop, 'Other') as prop - FROM - (SELECT *, - prop_vals as prop - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'sign up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'play movie', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = 'buy', 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2, - replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1 - OR step_2 = 1) ))) - WHERE step_0 = 1 - UNION ALL SELECT *, - arraySort([latest_0,latest_1,latest_2]) as event_times, - arraySum([if(latest_0 < latest_1 AND latest_1 <= latest_0 + INTERVAL 7 DAY, 1, 0),if(latest_0 < latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 1, 0), 1]) AS steps , - arraySort([latest_0,latest_1,latest_2]) as conversion_times, - if(isNotNull(conversion_times[2]) - AND conversion_times[2] <= conversion_times[1] + INTERVAL 7 DAY, dateDiff('second', conversion_times[1], conversion_times[2]), NULL) step_1_conversion_time, - if(isNotNull(conversion_times[3]) - AND conversion_times[3] <= conversion_times[2] + INTERVAL 7 DAY, dateDiff('second', conversion_times[2], conversion_times[3]), NULL) step_2_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - if(has(['technology', 'finance'], prop), prop, 'Other') as prop - FROM - (SELECT *, - prop_vals as prop - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'play movie', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'buy', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = 'sign up', 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2, - replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1 - OR step_2 = 1) ))) - WHERE step_0 = 1 - UNION ALL SELECT *, - arraySort([latest_0,latest_1,latest_2]) as event_times, - arraySum([if(latest_0 < latest_1 AND latest_1 <= latest_0 + INTERVAL 7 DAY, 1, 0),if(latest_0 < latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 1, 0), 1]) AS steps , - arraySort([latest_0,latest_1,latest_2]) as conversion_times, - if(isNotNull(conversion_times[2]) - AND conversion_times[2] <= conversion_times[1] + INTERVAL 7 DAY, dateDiff('second', conversion_times[1], conversion_times[2]), NULL) step_1_conversion_time, - if(isNotNull(conversion_times[3]) - AND conversion_times[3] <= conversion_times[2] + INTERVAL 7 DAY, dateDiff('second', conversion_times[2], conversion_times[3]), NULL) step_2_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - if(has(['technology', 'finance'], prop), prop, 'Other') as prop - FROM - (SELECT *, - prop_vals as prop - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'buy', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'sign up', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = 'play movie', 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2, - replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1 - OR step_2 = 1) ))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps, - prop - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2, 3] - AND arrayFlatten(array(prop)) = arrayFlatten(array('technology')) - ORDER BY aggregation_target - LIMIT 100 - OFFSET 0 SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000 - ''' -# --- -# name: TestUnorderedFunnelGroupBreakdown.test_funnel_breakdown_group.14 - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, - count(*) as count - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: TestUnorderedFunnelGroupBreakdown.test_funnel_breakdown_group.15 - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, - count(*) as count - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: TestUnorderedFunnelGroupBreakdown.test_funnel_breakdown_group.16 - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, - count(*) as count - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: TestUnorderedFunnelGroupBreakdown.test_funnel_breakdown_group.17 - ''' - - SELECT aggregation_target AS actor_id - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - avg(step_2_conversion_time) step_2_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner, - prop - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target, - prop) as max_steps, - step_1_conversion_time, - step_2_conversion_time, - prop - FROM - (SELECT *, - arraySort([latest_0,latest_1,latest_2]) as event_times, - arraySum([if(latest_0 < latest_1 AND latest_1 <= latest_0 + INTERVAL 7 DAY, 1, 0),if(latest_0 < latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 1, 0), 1]) AS steps , - arraySort([latest_0,latest_1,latest_2]) as conversion_times, - if(isNotNull(conversion_times[2]) - AND conversion_times[2] <= conversion_times[1] + INTERVAL 7 DAY, dateDiff('second', conversion_times[1], conversion_times[2]), NULL) step_1_conversion_time, - if(isNotNull(conversion_times[3]) - AND conversion_times[3] <= conversion_times[2] + INTERVAL 7 DAY, dateDiff('second', conversion_times[2], conversion_times[3]), NULL) step_2_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - if(has(['technology', 'finance'], prop), prop, 'Other') as prop - FROM - (SELECT *, - prop_vals as prop - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'sign up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'play movie', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = 'buy', 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2, - replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1 - OR step_2 = 1) ))) - WHERE step_0 = 1 - UNION ALL SELECT *, - arraySort([latest_0,latest_1,latest_2]) as event_times, - arraySum([if(latest_0 < latest_1 AND latest_1 <= latest_0 + INTERVAL 7 DAY, 1, 0),if(latest_0 < latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 1, 0), 1]) AS steps , - arraySort([latest_0,latest_1,latest_2]) as conversion_times, - if(isNotNull(conversion_times[2]) - AND conversion_times[2] <= conversion_times[1] + INTERVAL 7 DAY, dateDiff('second', conversion_times[1], conversion_times[2]), NULL) step_1_conversion_time, - if(isNotNull(conversion_times[3]) - AND conversion_times[3] <= conversion_times[2] + INTERVAL 7 DAY, dateDiff('second', conversion_times[2], conversion_times[3]), NULL) step_2_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - if(has(['technology', 'finance'], prop), prop, 'Other') as prop - FROM - (SELECT *, - prop_vals as prop - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'play movie', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'buy', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = 'sign up', 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2, - replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1 - OR step_2 = 1) ))) - WHERE step_0 = 1 - UNION ALL SELECT *, - arraySort([latest_0,latest_1,latest_2]) as event_times, - arraySum([if(latest_0 < latest_1 AND latest_1 <= latest_0 + INTERVAL 7 DAY, 1, 0),if(latest_0 < latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 1, 0), 1]) AS steps , - arraySort([latest_0,latest_1,latest_2]) as conversion_times, - if(isNotNull(conversion_times[2]) - AND conversion_times[2] <= conversion_times[1] + INTERVAL 7 DAY, dateDiff('second', conversion_times[1], conversion_times[2]), NULL) step_1_conversion_time, - if(isNotNull(conversion_times[3]) - AND conversion_times[3] <= conversion_times[2] + INTERVAL 7 DAY, dateDiff('second', conversion_times[2], conversion_times[3]), NULL) step_2_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - if(has(['technology', 'finance'], prop), prop, 'Other') as prop - FROM - (SELECT *, - prop_vals as prop - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'buy', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'sign up', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = 'play movie', 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2, - replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1 - OR step_2 = 1) ))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps, - prop - HAVING steps = max(max_steps)) - WHERE steps IN [2, 3] - AND arrayFlatten(array(prop)) = arrayFlatten(array('technology')) - ORDER BY aggregation_target - LIMIT 100 - OFFSET 0 SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000 - ''' -# --- -# name: TestUnorderedFunnelGroupBreakdown.test_funnel_breakdown_group.2 - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, - count(*) as count - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: TestUnorderedFunnelGroupBreakdown.test_funnel_breakdown_group.3 - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, - count(*) as count - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: TestUnorderedFunnelGroupBreakdown.test_funnel_breakdown_group.4 - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, - count(*) as count - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: TestUnorderedFunnelGroupBreakdown.test_funnel_breakdown_group.5 - ''' - - SELECT aggregation_target AS actor_id - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - avg(step_2_conversion_time) step_2_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner, - prop - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target, - prop) as max_steps, - step_1_conversion_time, - step_2_conversion_time, - prop - FROM - (SELECT *, - arraySort([latest_0,latest_1,latest_2]) as event_times, - arraySum([if(latest_0 < latest_1 AND latest_1 <= latest_0 + INTERVAL 7 DAY, 1, 0),if(latest_0 < latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 1, 0), 1]) AS steps , - arraySort([latest_0,latest_1,latest_2]) as conversion_times, - if(isNotNull(conversion_times[2]) - AND conversion_times[2] <= conversion_times[1] + INTERVAL 7 DAY, dateDiff('second', conversion_times[1], conversion_times[2]), NULL) step_1_conversion_time, - if(isNotNull(conversion_times[3]) - AND conversion_times[3] <= conversion_times[2] + INTERVAL 7 DAY, dateDiff('second', conversion_times[2], conversion_times[3]), NULL) step_2_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - if(has(['technology', 'finance'], prop), prop, 'Other') as prop - FROM - (SELECT *, - prop_vals as prop - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'sign up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'play movie', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = 'buy', 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2, - replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1 - OR step_2 = 1) ))) - WHERE step_0 = 1 - UNION ALL SELECT *, - arraySort([latest_0,latest_1,latest_2]) as event_times, - arraySum([if(latest_0 < latest_1 AND latest_1 <= latest_0 + INTERVAL 7 DAY, 1, 0),if(latest_0 < latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 1, 0), 1]) AS steps , - arraySort([latest_0,latest_1,latest_2]) as conversion_times, - if(isNotNull(conversion_times[2]) - AND conversion_times[2] <= conversion_times[1] + INTERVAL 7 DAY, dateDiff('second', conversion_times[1], conversion_times[2]), NULL) step_1_conversion_time, - if(isNotNull(conversion_times[3]) - AND conversion_times[3] <= conversion_times[2] + INTERVAL 7 DAY, dateDiff('second', conversion_times[2], conversion_times[3]), NULL) step_2_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - if(has(['technology', 'finance'], prop), prop, 'Other') as prop - FROM - (SELECT *, - prop_vals as prop - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'play movie', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'buy', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = 'sign up', 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2, - replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1 - OR step_2 = 1) ))) - WHERE step_0 = 1 - UNION ALL SELECT *, - arraySort([latest_0,latest_1,latest_2]) as event_times, - arraySum([if(latest_0 < latest_1 AND latest_1 <= latest_0 + INTERVAL 7 DAY, 1, 0),if(latest_0 < latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 1, 0), 1]) AS steps , - arraySort([latest_0,latest_1,latest_2]) as conversion_times, - if(isNotNull(conversion_times[2]) - AND conversion_times[2] <= conversion_times[1] + INTERVAL 7 DAY, dateDiff('second', conversion_times[1], conversion_times[2]), NULL) step_1_conversion_time, - if(isNotNull(conversion_times[3]) - AND conversion_times[3] <= conversion_times[2] + INTERVAL 7 DAY, dateDiff('second', conversion_times[2], conversion_times[3]), NULL) step_2_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - if(has(['technology', 'finance'], prop), prop, 'Other') as prop - FROM - (SELECT *, - prop_vals as prop - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'buy', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'sign up', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = 'play movie', 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2, - replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1 - OR step_2 = 1) ))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps, - prop - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2, 3] - AND arrayFlatten(array(prop)) = arrayFlatten(array('finance')) - ORDER BY aggregation_target - LIMIT 100 - OFFSET 0 SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000 - ''' -# --- -# name: TestUnorderedFunnelGroupBreakdown.test_funnel_breakdown_group.6 - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, - count(*) as count - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: TestUnorderedFunnelGroupBreakdown.test_funnel_breakdown_group.7 - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, - count(*) as count - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: TestUnorderedFunnelGroupBreakdown.test_funnel_breakdown_group.8 - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, - count(*) as count - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: TestUnorderedFunnelGroupBreakdown.test_funnel_breakdown_group.9 - ''' - - SELECT aggregation_target AS actor_id - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - avg(step_2_conversion_time) step_2_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner, - prop - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target, - prop) as max_steps, - step_1_conversion_time, - step_2_conversion_time, - prop - FROM - (SELECT *, - arraySort([latest_0,latest_1,latest_2]) as event_times, - arraySum([if(latest_0 < latest_1 AND latest_1 <= latest_0 + INTERVAL 7 DAY, 1, 0),if(latest_0 < latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 1, 0), 1]) AS steps , - arraySort([latest_0,latest_1,latest_2]) as conversion_times, - if(isNotNull(conversion_times[2]) - AND conversion_times[2] <= conversion_times[1] + INTERVAL 7 DAY, dateDiff('second', conversion_times[1], conversion_times[2]), NULL) step_1_conversion_time, - if(isNotNull(conversion_times[3]) - AND conversion_times[3] <= conversion_times[2] + INTERVAL 7 DAY, dateDiff('second', conversion_times[2], conversion_times[3]), NULL) step_2_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - if(has(['technology', 'finance'], prop), prop, 'Other') as prop - FROM - (SELECT *, - prop_vals as prop - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'sign up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'play movie', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = 'buy', 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2, - replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1 - OR step_2 = 1) ))) - WHERE step_0 = 1 - UNION ALL SELECT *, - arraySort([latest_0,latest_1,latest_2]) as event_times, - arraySum([if(latest_0 < latest_1 AND latest_1 <= latest_0 + INTERVAL 7 DAY, 1, 0),if(latest_0 < latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 1, 0), 1]) AS steps , - arraySort([latest_0,latest_1,latest_2]) as conversion_times, - if(isNotNull(conversion_times[2]) - AND conversion_times[2] <= conversion_times[1] + INTERVAL 7 DAY, dateDiff('second', conversion_times[1], conversion_times[2]), NULL) step_1_conversion_time, - if(isNotNull(conversion_times[3]) - AND conversion_times[3] <= conversion_times[2] + INTERVAL 7 DAY, dateDiff('second', conversion_times[2], conversion_times[3]), NULL) step_2_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - if(has(['technology', 'finance'], prop), prop, 'Other') as prop - FROM - (SELECT *, - prop_vals as prop - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'play movie', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'buy', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = 'sign up', 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2, - replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1 - OR step_2 = 1) ))) - WHERE step_0 = 1 - UNION ALL SELECT *, - arraySort([latest_0,latest_1,latest_2]) as event_times, - arraySum([if(latest_0 < latest_1 AND latest_1 <= latest_0 + INTERVAL 7 DAY, 1, 0),if(latest_0 < latest_2 AND latest_2 <= latest_0 + INTERVAL 7 DAY, 1, 0), 1]) AS steps , - arraySort([latest_0,latest_1,latest_2]) as conversion_times, - if(isNotNull(conversion_times[2]) - AND conversion_times[2] <= conversion_times[1] + INTERVAL 7 DAY, dateDiff('second', conversion_times[1], conversion_times[2]), NULL) step_1_conversion_time, - if(isNotNull(conversion_times[3]) - AND conversion_times[3] <= conversion_times[2] + INTERVAL 7 DAY, dateDiff('second', conversion_times[2], conversion_times[3]), NULL) step_2_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - if(has(['technology', 'finance'], prop), prop, 'Other') as prop - FROM - (SELECT *, - prop_vals as prop - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'buy', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'sign up', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = 'play movie', 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2, - replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1 - OR step_2 = 1) ))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps, - prop - HAVING steps = max(max_steps)) - WHERE steps IN [2, 3] - AND arrayFlatten(array(prop)) = arrayFlatten(array('finance')) - ORDER BY aggregation_target - LIMIT 100 - OFFSET 0 SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000 - ''' -# --- diff --git a/ee/clickhouse/queries/funnels/test/__snapshots__/test_funnel_correlation.ambr b/ee/clickhouse/queries/funnels/test/__snapshots__/test_funnel_correlation.ambr deleted file mode 100644 index fcf2044085..0000000000 --- a/ee/clickhouse/queries/funnels/test/__snapshots__/test_funnel_correlation.ambr +++ /dev/null @@ -1,4902 +0,0 @@ -# serializer version: 1 -# name: TestClickhouseFunnelCorrelation.test_action_events_are_excluded_from_correlations - ''' - WITH funnel_actors as - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(((event = 'user signed up' - AND (has(['val'], replaceRegexpAll(JSONExtractRaw(properties, 'key'), '^"|"$', ''))))) , 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(((event = 'paid' - AND (has(['val'], replaceRegexpAll(JSONExtractRaw(properties, 'key'), '^"|"$', ''))))) , 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up', 'user signed up', 'paid'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up', 'user signed up', 'paid'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - toDateTime('2020-01-14 23:59:59', 'UTC') AS date_to, - toDateTime('2020-01-01 00:00:00', 'UTC') AS date_from, - 2 AS target_step, - ['paid', 'user signed up'] as funnel_step_names - SELECT event.event AS name, - countDistinctIf(actors.actor_id, actors.steps = target_step) AS success_count, - countDistinctIf(actors.actor_id, actors.steps <> target_step) AS failure_count - FROM events AS event - JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON pdi.distinct_id = events.distinct_id - JOIN funnel_actors AS actors ON pdi.person_id = actors.actor_id - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 99999 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names - AND event.event NOT IN [] - GROUP BY name - UNION ALL - SELECT 'Total_Values_In_Query' as name, - countDistinctIf(actors.actor_id, actors.steps = target_step) AS success_count, - countDistinctIf(actors.actor_id, actors.steps <> target_step) AS failure_count - FROM funnel_actors AS actors - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_basic_funnel_correlation_with_properties - ''' - WITH funnel_actors as - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT concat(prop.1, '::', prop.2) as name, - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count - FROM - (SELECT actor_id, - funnel_actors.steps as steps, - arrayJoin(arrayZip(['$browser'], [replaceRegexpAll(JSONExtractRaw(person_props, '$browser'), '^"|"$', '')])) as prop - FROM funnel_actors - JOIN - (SELECT id, - argMax(properties, version) as person_props - FROM person - WHERE team_id = 99999 - GROUP BY id - HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1) person ON person.id = funnel_actors.actor_id) aggregation_target_with_props - GROUP BY prop.1, - prop.2 - HAVING prop.1 NOT IN [] - UNION ALL - SELECT 'Total_Values_In_Query' as name, - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count - FROM funnel_actors - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_basic_funnel_correlation_with_properties.1 - ''' - WITH funnel_actors AS - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id - FROM person - WHERE team_id = 99999 - AND id IN - (SELECT id - FROM person - WHERE team_id = 99999 - AND (has(['Positive'], replaceRegexpAll(JSONExtractRaw(properties, '$browser'), '^"|"$', ''))) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND (has(['Positive'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), '$browser'), '^"|"$', ''))) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT funnel_actors.actor_id AS actor_id - FROM funnel_actors - WHERE funnel_actors.steps = target_step - GROUP BY funnel_actors.actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_basic_funnel_correlation_with_properties.2 - ''' - WITH funnel_actors AS - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id - FROM person - WHERE team_id = 99999 - AND id IN - (SELECT id - FROM person - WHERE team_id = 99999 - AND (has(['Positive'], replaceRegexpAll(JSONExtractRaw(properties, '$browser'), '^"|"$', ''))) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND (has(['Positive'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), '$browser'), '^"|"$', ''))) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT funnel_actors.actor_id AS actor_id - FROM funnel_actors - WHERE funnel_actors.steps <> target_step - GROUP BY funnel_actors.actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_basic_funnel_correlation_with_properties.3 - ''' - WITH funnel_actors AS - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id - FROM person - WHERE team_id = 99999 - AND id IN - (SELECT id - FROM person - WHERE team_id = 99999 - AND (has(['Negative'], replaceRegexpAll(JSONExtractRaw(properties, '$browser'), '^"|"$', ''))) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND (has(['Negative'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), '$browser'), '^"|"$', ''))) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT funnel_actors.actor_id AS actor_id - FROM funnel_actors - WHERE funnel_actors.steps = target_step - GROUP BY funnel_actors.actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_basic_funnel_correlation_with_properties.4 - ''' - WITH funnel_actors AS - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id - FROM person - WHERE team_id = 99999 - AND id IN - (SELECT id - FROM person - WHERE team_id = 99999 - AND (has(['Negative'], replaceRegexpAll(JSONExtractRaw(properties, '$browser'), '^"|"$', ''))) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND (has(['Negative'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), '$browser'), '^"|"$', ''))) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT funnel_actors.actor_id AS actor_id - FROM funnel_actors - WHERE funnel_actors.steps <> target_step - GROUP BY funnel_actors.actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_basic_funnel_correlation_with_properties_materialized - ''' - WITH funnel_actors as - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT concat(prop.1, '::', prop.2) as name, - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count - FROM - (SELECT actor_id, - funnel_actors.steps as steps, - arrayJoin(arrayZip(['$browser'], ["pmat_$browser"])) as prop - FROM funnel_actors - JOIN - (SELECT id, - argMax(pmat_$browser, version) as pmat_$browser - FROM person - WHERE team_id = 99999 - GROUP BY id - HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1) person ON person.id = funnel_actors.actor_id) aggregation_target_with_props - GROUP BY prop.1, - prop.2 - HAVING prop.1 NOT IN [] - UNION ALL - SELECT 'Total_Values_In_Query' as name, - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count - FROM funnel_actors - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_basic_funnel_correlation_with_properties_materialized.1 - ''' - WITH funnel_actors AS - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id - FROM person - WHERE team_id = 99999 - AND id IN - (SELECT id - FROM person - WHERE team_id = 99999 - AND (has(['Positive'], "pmat_$browser")) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND (has(['Positive'], argMax(person."pmat_$browser", version))) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT funnel_actors.actor_id AS actor_id - FROM funnel_actors - WHERE funnel_actors.steps = target_step - GROUP BY funnel_actors.actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_basic_funnel_correlation_with_properties_materialized.2 - ''' - WITH funnel_actors AS - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id - FROM person - WHERE team_id = 99999 - AND id IN - (SELECT id - FROM person - WHERE team_id = 99999 - AND (has(['Positive'], "pmat_$browser")) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND (has(['Positive'], argMax(person."pmat_$browser", version))) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT funnel_actors.actor_id AS actor_id - FROM funnel_actors - WHERE funnel_actors.steps <> target_step - GROUP BY funnel_actors.actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_basic_funnel_correlation_with_properties_materialized.3 - ''' - WITH funnel_actors AS - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id - FROM person - WHERE team_id = 99999 - AND id IN - (SELECT id - FROM person - WHERE team_id = 99999 - AND (has(['Negative'], "pmat_$browser")) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND (has(['Negative'], argMax(person."pmat_$browser", version))) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT funnel_actors.actor_id AS actor_id - FROM funnel_actors - WHERE funnel_actors.steps = target_step - GROUP BY funnel_actors.actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_basic_funnel_correlation_with_properties_materialized.4 - ''' - WITH funnel_actors AS - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id - FROM person - WHERE team_id = 99999 - AND id IN - (SELECT id - FROM person - WHERE team_id = 99999 - AND (has(['Negative'], "pmat_$browser")) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND (has(['Negative'], argMax(person."pmat_$browser", version))) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT funnel_actors.actor_id AS actor_id - FROM funnel_actors - WHERE funnel_actors.steps <> target_step - GROUP BY funnel_actors.actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_event_properties_and_groups - ''' - WITH funnel_actors as - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_1" as aggregation_target, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - toDateTime('2020-01-14 23:59:59', 'UTC') AS date_to, - toDateTime('2020-01-01 00:00:00', 'UTC') AS date_from, - 2 AS target_step, - ['paid', 'user signed up'] as funnel_step_names - SELECT concat(event_name, '::', prop.1, '::', prop.2) as name, - countDistinctIf(actor_id, steps = target_step) as success_count, - countDistinctIf(actor_id, steps <> target_step) as failure_count - FROM - (SELECT actors.actor_id as actor_id, - actors.steps as steps, - events.event as event_name, - arrayJoin(JSONExtractKeysAndValues(properties, 'String')) as prop - FROM events AS event - JOIN funnel_actors AS actors ON actors.actor_id = events.$group_1 - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 99999 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names - AND event.event IN ['positively_related', 'negatively_related'] ) - GROUP BY name, - prop - HAVING (success_count + failure_count) > 2 - AND prop.1 NOT IN [] - UNION ALL - SELECT 'Total_Values_In_Query' as name, - countDistinctIf(actors.actor_id, actors.steps = target_step) AS success_count, - countDistinctIf(actors.actor_id, actors.steps <> target_step) AS failure_count - FROM funnel_actors AS actors - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_event_properties_and_groups_materialized - ''' - WITH funnel_actors as - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_1" as aggregation_target, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - toDateTime('2020-01-14 23:59:59', 'UTC') AS date_to, - toDateTime('2020-01-01 00:00:00', 'UTC') AS date_from, - 2 AS target_step, - ['paid', 'user signed up'] as funnel_step_names - SELECT concat(event_name, '::', prop.1, '::', prop.2) as name, - countDistinctIf(actor_id, steps = target_step) as success_count, - countDistinctIf(actor_id, steps <> target_step) as failure_count - FROM - (SELECT actors.actor_id as actor_id, - actors.steps as steps, - events.event as event_name, - arrayJoin(JSONExtractKeysAndValues(properties, 'String')) as prop - FROM events AS event - JOIN funnel_actors AS actors ON actors.actor_id = events.$group_1 - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 99999 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names - AND event.event IN ['positively_related', 'negatively_related'] ) - GROUP BY name, - prop - HAVING (success_count + failure_count) > 2 - AND prop.1 NOT IN [] - UNION ALL - SELECT 'Total_Values_In_Query' as name, - countDistinctIf(actors.actor_id, actors.steps = target_step) AS success_count, - countDistinctIf(actors.actor_id, actors.steps <> target_step) AS failure_count - FROM funnel_actors AS actors - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups - ''' - WITH funnel_actors as - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - toDateTime('2020-01-14 23:59:59', 'UTC') AS date_to, - toDateTime('2020-01-01 00:00:00', 'UTC') AS date_from, - 2 AS target_step, - ['paid', 'user signed up'] as funnel_step_names - SELECT event.event AS name, - countDistinctIf(actors.actor_id, actors.steps = target_step) AS success_count, - countDistinctIf(actors.actor_id, actors.steps <> target_step) AS failure_count - FROM events AS event - JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 99999 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names - AND event.event NOT IN [] - GROUP BY name - UNION ALL - SELECT 'Total_Values_In_Query' as name, - countDistinctIf(actors.actor_id, actors.steps = target_step) AS success_count, - countDistinctIf(actors.actor_id, actors.steps <> target_step) AS failure_count - FROM funnel_actors AS actors - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups.1 - ''' - WITH funnel_actors as - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - toDateTime('2020-01-14 23:59:59', 'UTC') AS date_to, - toDateTime('2020-01-01 00:00:00', 'UTC') AS date_from, - 2 AS target_step, - ['user signed up', 'paid'] as funnel_step_names - SELECT actors.actor_id AS actor_id - FROM events AS event - JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 99999 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names - AND event.event = 'positively_related' - AND actors.steps = target_step - GROUP BY actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups.2 - ''' - WITH funnel_actors as - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - toDateTime('2020-01-14 23:59:59', 'UTC') AS date_to, - toDateTime('2020-01-01 00:00:00', 'UTC') AS date_from, - 2 AS target_step, - ['user signed up', 'paid'] as funnel_step_names - SELECT actors.actor_id AS actor_id - FROM events AS event - JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 99999 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names - AND event.event = 'positively_related' - AND actors.steps <> target_step - GROUP BY actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups.3 - ''' - WITH funnel_actors as - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - toDateTime('2020-01-14 23:59:59', 'UTC') AS date_to, - toDateTime('2020-01-01 00:00:00', 'UTC') AS date_from, - 2 AS target_step, - ['user signed up', 'paid'] as funnel_step_names - SELECT actors.actor_id AS actor_id - FROM events AS event - JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 99999 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names - AND event.event = 'negatively_related' - AND actors.steps = target_step - GROUP BY actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups.4 - ''' - WITH funnel_actors as - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - toDateTime('2020-01-14 23:59:59', 'UTC') AS date_to, - toDateTime('2020-01-01 00:00:00', 'UTC') AS date_from, - 2 AS target_step, - ['user signed up', 'paid'] as funnel_step_names - SELECT actors.actor_id AS actor_id - FROM events AS event - JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 99999 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names - AND event.event = 'negatively_related' - AND actors.steps <> target_step - GROUP BY actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups.5 - ''' - WITH funnel_actors as - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (has(['finance'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - toDateTime('2020-01-14 23:59:59', 'UTC') AS date_to, - toDateTime('2020-01-01 00:00:00', 'UTC') AS date_from, - 2 AS target_step, - ['paid', 'user signed up'] as funnel_step_names - SELECT event.event AS name, - countDistinctIf(actors.actor_id, actors.steps = target_step) AS success_count, - countDistinctIf(actors.actor_id, actors.steps <> target_step) AS failure_count - FROM events AS event - JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 99999 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names - AND event.event NOT IN [] - GROUP BY name - UNION ALL - SELECT 'Total_Values_In_Query' as name, - countDistinctIf(actors.actor_id, actors.steps = target_step) AS success_count, - countDistinctIf(actors.actor_id, actors.steps <> target_step) AS failure_count - FROM funnel_actors AS actors - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups.6 - ''' - WITH funnel_actors as - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (has(['finance'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - toDateTime('2020-01-14 23:59:59', 'UTC') AS date_to, - toDateTime('2020-01-01 00:00:00', 'UTC') AS date_from, - 2 AS target_step, - ['user signed up', 'paid'] as funnel_step_names - SELECT actors.actor_id AS actor_id - FROM events AS event - JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 99999 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names - AND event.event = 'negatively_related' - AND actors.steps = target_step - GROUP BY actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups.7 - ''' - WITH funnel_actors as - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (has(['finance'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - toDateTime('2020-01-14 23:59:59', 'UTC') AS date_to, - toDateTime('2020-01-01 00:00:00', 'UTC') AS date_from, - 2 AS target_step, - ['user signed up', 'paid'] as funnel_step_names - SELECT actors.actor_id AS actor_id - FROM events AS event - JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 99999 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names - AND event.event = 'negatively_related' - AND actors.steps <> target_step - GROUP BY actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups_poe_v2 - ''' - WITH funnel_actors as - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(notEmpty(overrides.distinct_id), overrides.person_id, e.person_id) as person_id, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0)) AS overrides ON e.distinct_id = overrides.distinct_id - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND notEmpty(e.person_id) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - toDateTime('2020-01-14 23:59:59', 'UTC') AS date_to, - toDateTime('2020-01-01 00:00:00', 'UTC') AS date_from, - 2 AS target_step, - ['paid', 'user signed up'] as funnel_step_names - SELECT event.event AS name, - countDistinctIf(actors.actor_id, actors.steps = target_step) AS success_count, - countDistinctIf(actors.actor_id, actors.steps <> target_step) AS failure_count - FROM events AS event - JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 99999 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names - AND event.event NOT IN [] - GROUP BY name - UNION ALL - SELECT 'Total_Values_In_Query' as name, - countDistinctIf(actors.actor_id, actors.steps = target_step) AS success_count, - countDistinctIf(actors.actor_id, actors.steps <> target_step) AS failure_count - FROM funnel_actors AS actors - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups_poe_v2.1 - ''' - WITH funnel_actors as - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(notEmpty(overrides.distinct_id), overrides.person_id, e.person_id) as person_id, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0)) AS overrides ON e.distinct_id = overrides.distinct_id - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND notEmpty(e.person_id) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - toDateTime('2020-01-14 23:59:59', 'UTC') AS date_to, - toDateTime('2020-01-01 00:00:00', 'UTC') AS date_from, - 2 AS target_step, - ['user signed up', 'paid'] as funnel_step_names - SELECT actors.actor_id AS actor_id - FROM events AS event - JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 99999 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names - AND event.event = 'positively_related' - AND actors.steps = target_step - GROUP BY actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups_poe_v2.2 - ''' - WITH funnel_actors as - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(notEmpty(overrides.distinct_id), overrides.person_id, e.person_id) as person_id, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0)) AS overrides ON e.distinct_id = overrides.distinct_id - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND notEmpty(e.person_id) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - toDateTime('2020-01-14 23:59:59', 'UTC') AS date_to, - toDateTime('2020-01-01 00:00:00', 'UTC') AS date_from, - 2 AS target_step, - ['user signed up', 'paid'] as funnel_step_names - SELECT actors.actor_id AS actor_id - FROM events AS event - JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 99999 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names - AND event.event = 'positively_related' - AND actors.steps <> target_step - GROUP BY actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups_poe_v2.3 - ''' - WITH funnel_actors as - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(notEmpty(overrides.distinct_id), overrides.person_id, e.person_id) as person_id, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0)) AS overrides ON e.distinct_id = overrides.distinct_id - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND notEmpty(e.person_id) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - toDateTime('2020-01-14 23:59:59', 'UTC') AS date_to, - toDateTime('2020-01-01 00:00:00', 'UTC') AS date_from, - 2 AS target_step, - ['user signed up', 'paid'] as funnel_step_names - SELECT actors.actor_id AS actor_id - FROM events AS event - JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 99999 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names - AND event.event = 'negatively_related' - AND actors.steps = target_step - GROUP BY actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups_poe_v2.4 - ''' - WITH funnel_actors as - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(notEmpty(overrides.distinct_id), overrides.person_id, e.person_id) as person_id, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0)) AS overrides ON e.distinct_id = overrides.distinct_id - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND notEmpty(e.person_id) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - toDateTime('2020-01-14 23:59:59', 'UTC') AS date_to, - toDateTime('2020-01-01 00:00:00', 'UTC') AS date_from, - 2 AS target_step, - ['user signed up', 'paid'] as funnel_step_names - SELECT actors.actor_id AS actor_id - FROM events AS event - JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 99999 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names - AND event.event = 'negatively_related' - AND actors.steps <> target_step - GROUP BY actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups_poe_v2.5 - ''' - WITH funnel_actors as - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(notEmpty(overrides.distinct_id), overrides.person_id, e.person_id) as person_id, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0)) AS overrides ON e.distinct_id = overrides.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (has(['finance'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) - AND notEmpty(e.person_id) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - toDateTime('2020-01-14 23:59:59', 'UTC') AS date_to, - toDateTime('2020-01-01 00:00:00', 'UTC') AS date_from, - 2 AS target_step, - ['paid', 'user signed up'] as funnel_step_names - SELECT event.event AS name, - countDistinctIf(actors.actor_id, actors.steps = target_step) AS success_count, - countDistinctIf(actors.actor_id, actors.steps <> target_step) AS failure_count - FROM events AS event - JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 99999 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names - AND event.event NOT IN [] - GROUP BY name - UNION ALL - SELECT 'Total_Values_In_Query' as name, - countDistinctIf(actors.actor_id, actors.steps = target_step) AS success_count, - countDistinctIf(actors.actor_id, actors.steps <> target_step) AS failure_count - FROM funnel_actors AS actors - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups_poe_v2.6 - ''' - WITH funnel_actors as - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(notEmpty(overrides.distinct_id), overrides.person_id, e.person_id) as person_id, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0)) AS overrides ON e.distinct_id = overrides.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (has(['finance'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) - AND notEmpty(e.person_id) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - toDateTime('2020-01-14 23:59:59', 'UTC') AS date_to, - toDateTime('2020-01-01 00:00:00', 'UTC') AS date_from, - 2 AS target_step, - ['user signed up', 'paid'] as funnel_step_names - SELECT actors.actor_id AS actor_id - FROM events AS event - JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 99999 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names - AND event.event = 'negatively_related' - AND actors.steps = target_step - GROUP BY actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups_poe_v2.7 - ''' - WITH funnel_actors as - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(notEmpty(overrides.distinct_id), overrides.person_id, e.person_id) as person_id, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0)) AS overrides ON e.distinct_id = overrides.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (has(['finance'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) - AND notEmpty(e.person_id) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - toDateTime('2020-01-14 23:59:59', 'UTC') AS date_to, - toDateTime('2020-01-01 00:00:00', 'UTC') AS date_from, - 2 AS target_step, - ['user signed up', 'paid'] as funnel_step_names - SELECT actors.actor_id AS actor_id - FROM events AS event - JOIN funnel_actors AS actors ON actors.actor_id = events.$group_0 - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 99999 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names - AND event.event = 'negatively_related' - AND actors.steps <> target_step - GROUP BY actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups - ''' - WITH funnel_actors as - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT concat(prop.1, '::', prop.2) as name, - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count - FROM - (SELECT actor_id, - funnel_actors.steps as steps, - arrayJoin(arrayZip(['industry'], [replaceRegexpAll(JSONExtractRaw(groups_0.group_properties_0, 'industry'), '^"|"$', '')])) as prop - FROM funnel_actors - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON funnel_actors.actor_id == groups_0.group_key) aggregation_target_with_props - GROUP BY prop.1, - prop.2 - HAVING prop.1 NOT IN [] - UNION ALL - SELECT 'Total_Values_In_Query' as name, - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count - FROM funnel_actors - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups.1 - ''' - WITH funnel_actors AS - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (has(['positive'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT funnel_actors.actor_id AS actor_id - FROM funnel_actors - WHERE funnel_actors.steps = target_step - GROUP BY funnel_actors.actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups.2 - ''' - WITH funnel_actors AS - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (has(['positive'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT funnel_actors.actor_id AS actor_id - FROM funnel_actors - WHERE funnel_actors.steps <> target_step - GROUP BY funnel_actors.actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups.3 - ''' - WITH funnel_actors AS - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (has(['negative'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT funnel_actors.actor_id AS actor_id - FROM funnel_actors - WHERE funnel_actors.steps = target_step - GROUP BY funnel_actors.actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups.4 - ''' - WITH funnel_actors AS - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (has(['negative'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT funnel_actors.actor_id AS actor_id - FROM funnel_actors - WHERE funnel_actors.steps <> target_step - GROUP BY funnel_actors.actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups.5 - ''' - WITH funnel_actors as - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT concat(prop.1, '::', prop.2) as name, - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count - FROM - (SELECT actor_id, - funnel_actors.steps as steps, - arrayJoin(JSONExtractKeysAndValues(groups_0.group_properties_0, 'String')) as prop - FROM funnel_actors - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON funnel_actors.actor_id == groups_0.group_key) aggregation_target_with_props - GROUP BY prop.1, - prop.2 - HAVING prop.1 NOT IN [] - UNION ALL - SELECT 'Total_Values_In_Query' as name, - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count - FROM funnel_actors - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_materialized - ''' - WITH funnel_actors as - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT concat(prop.1, '::', prop.2) as name, - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count - FROM - (SELECT actor_id, - funnel_actors.steps as steps, - arrayJoin(arrayZip(['industry'], [replaceRegexpAll(JSONExtractRaw(groups_0.group_properties_0, 'industry'), '^"|"$', '')])) as prop - FROM funnel_actors - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON funnel_actors.actor_id == groups_0.group_key) aggregation_target_with_props - GROUP BY prop.1, - prop.2 - HAVING prop.1 NOT IN [] - UNION ALL - SELECT 'Total_Values_In_Query' as name, - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count - FROM funnel_actors - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_materialized.1 - ''' - WITH funnel_actors AS - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (has(['positive'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT funnel_actors.actor_id AS actor_id - FROM funnel_actors - WHERE funnel_actors.steps = target_step - GROUP BY funnel_actors.actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_materialized.2 - ''' - WITH funnel_actors AS - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (has(['positive'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT funnel_actors.actor_id AS actor_id - FROM funnel_actors - WHERE funnel_actors.steps <> target_step - GROUP BY funnel_actors.actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_materialized.3 - ''' - WITH funnel_actors AS - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (has(['negative'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT funnel_actors.actor_id AS actor_id - FROM funnel_actors - WHERE funnel_actors.steps = target_step - GROUP BY funnel_actors.actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_materialized.4 - ''' - WITH funnel_actors AS - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (has(['negative'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT funnel_actors.actor_id AS actor_id - FROM funnel_actors - WHERE funnel_actors.steps <> target_step - GROUP BY funnel_actors.actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_materialized.5 - ''' - WITH funnel_actors as - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT concat(prop.1, '::', prop.2) as name, - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count - FROM - (SELECT actor_id, - funnel_actors.steps as steps, - arrayJoin(JSONExtractKeysAndValues(groups_0.group_properties_0, 'String')) as prop - FROM funnel_actors - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON funnel_actors.actor_id == groups_0.group_key) aggregation_target_with_props - GROUP BY prop.1, - prop.2 - HAVING prop.1 NOT IN [] - UNION ALL - SELECT 'Total_Values_In_Query' as name, - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count - FROM funnel_actors - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events - ''' - WITH funnel_actors as - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - e.person_id as person_id, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND notEmpty(e.person_id) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT concat(prop.1, '::', prop.2) as name, - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count - FROM - (SELECT actor_id, - funnel_actors.steps as steps, - arrayJoin(arrayZip(['industry'], [replaceRegexpAll(JSONExtractRaw(groups_0.group_properties_0, 'industry'), '^"|"$', '')])) as prop - FROM funnel_actors - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON funnel_actors.actor_id == groups_0.group_key) aggregation_target_with_props - GROUP BY prop.1, - prop.2 - HAVING prop.1 NOT IN [] - UNION ALL - SELECT 'Total_Values_In_Query' as name, - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count - FROM funnel_actors - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events.1 - ''' - WITH funnel_actors AS - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - e.person_id as person_id, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (has(['positive'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) - AND notEmpty(e.person_id) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT funnel_actors.actor_id AS actor_id - FROM funnel_actors - WHERE funnel_actors.steps = target_step - GROUP BY funnel_actors.actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events.2 - ''' - WITH funnel_actors AS - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - e.person_id as person_id, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (has(['positive'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) - AND notEmpty(e.person_id) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT funnel_actors.actor_id AS actor_id - FROM funnel_actors - WHERE funnel_actors.steps <> target_step - GROUP BY funnel_actors.actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events.3 - ''' - WITH funnel_actors AS - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - e.person_id as person_id, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (has(['negative'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) - AND notEmpty(e.person_id) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT funnel_actors.actor_id AS actor_id - FROM funnel_actors - WHERE funnel_actors.steps = target_step - GROUP BY funnel_actors.actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events.4 - ''' - WITH funnel_actors AS - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - e.person_id as person_id, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (has(['negative'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) - AND notEmpty(e.person_id) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT funnel_actors.actor_id AS actor_id - FROM funnel_actors - WHERE funnel_actors.steps <> target_step - GROUP BY funnel_actors.actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events.5 - ''' - WITH funnel_actors as - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - e.person_id as person_id, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND notEmpty(e.person_id) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT concat(prop.1, '::', prop.2) as name, - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count - FROM - (SELECT actor_id, - funnel_actors.steps as steps, - arrayJoin(JSONExtractKeysAndValues(groups_0.group_properties_0, 'String')) as prop - FROM funnel_actors - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON funnel_actors.actor_id == groups_0.group_key) aggregation_target_with_props - GROUP BY prop.1, - prop.2 - HAVING prop.1 NOT IN [] - UNION ALL - SELECT 'Total_Values_In_Query' as name, - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count - FROM funnel_actors - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_materialized - ''' - WITH funnel_actors as - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - e.person_id as person_id, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND notEmpty(e.person_id) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT concat(prop.1, '::', prop.2) as name, - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count - FROM - (SELECT actor_id, - funnel_actors.steps as steps, - arrayJoin(arrayZip(['industry'], [replaceRegexpAll(JSONExtractRaw(groups_0.group_properties_0, 'industry'), '^"|"$', '')])) as prop - FROM funnel_actors - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON funnel_actors.actor_id == groups_0.group_key) aggregation_target_with_props - GROUP BY prop.1, - prop.2 - HAVING prop.1 NOT IN [] - UNION ALL - SELECT 'Total_Values_In_Query' as name, - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count - FROM funnel_actors - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_materialized.1 - ''' - WITH funnel_actors AS - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - e.person_id as person_id, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (has(['positive'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) - AND notEmpty(e.person_id) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT funnel_actors.actor_id AS actor_id - FROM funnel_actors - WHERE funnel_actors.steps = target_step - GROUP BY funnel_actors.actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_materialized.2 - ''' - WITH funnel_actors AS - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - e.person_id as person_id, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (has(['positive'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) - AND notEmpty(e.person_id) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT funnel_actors.actor_id AS actor_id - FROM funnel_actors - WHERE funnel_actors.steps <> target_step - GROUP BY funnel_actors.actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_materialized.3 - ''' - WITH funnel_actors AS - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - e.person_id as person_id, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (has(['negative'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) - AND notEmpty(e.person_id) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT funnel_actors.actor_id AS actor_id - FROM funnel_actors - WHERE funnel_actors.steps = target_step - GROUP BY funnel_actors.actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_materialized.4 - ''' - WITH funnel_actors AS - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - e.person_id as person_id, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (has(['negative'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) - AND notEmpty(e.person_id) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT funnel_actors.actor_id AS actor_id - FROM funnel_actors - WHERE funnel_actors.steps <> target_step - GROUP BY funnel_actors.actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_materialized.5 - ''' - WITH funnel_actors as - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - e.person_id as person_id, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND notEmpty(e.person_id) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT concat(prop.1, '::', prop.2) as name, - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count - FROM - (SELECT actor_id, - funnel_actors.steps as steps, - arrayJoin(JSONExtractKeysAndValues(groups_0.group_properties_0, 'String')) as prop - FROM funnel_actors - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON funnel_actors.actor_id == groups_0.group_key) aggregation_target_with_props - GROUP BY prop.1, - prop.2 - HAVING prop.1 NOT IN [] - UNION ALL - SELECT 'Total_Values_In_Query' as name, - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count - FROM funnel_actors - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_poe_v2 - ''' - WITH funnel_actors as - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(notEmpty(overrides.distinct_id), overrides.person_id, e.person_id) as person_id, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0)) AS overrides ON e.distinct_id = overrides.distinct_id - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND notEmpty(e.person_id) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT concat(prop.1, '::', prop.2) as name, - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count - FROM - (SELECT actor_id, - funnel_actors.steps as steps, - arrayJoin(arrayZip(['industry'], [replaceRegexpAll(JSONExtractRaw(groups_0.group_properties_0, 'industry'), '^"|"$', '')])) as prop - FROM funnel_actors - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON funnel_actors.actor_id == groups_0.group_key) aggregation_target_with_props - GROUP BY prop.1, - prop.2 - HAVING prop.1 NOT IN [] - UNION ALL - SELECT 'Total_Values_In_Query' as name, - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count - FROM funnel_actors - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_poe_v2.1 - ''' - WITH funnel_actors AS - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(notEmpty(overrides.distinct_id), overrides.person_id, e.person_id) as person_id, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0)) AS overrides ON e.distinct_id = overrides.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (has(['positive'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) - AND notEmpty(e.person_id) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT funnel_actors.actor_id AS actor_id - FROM funnel_actors - WHERE funnel_actors.steps = target_step - GROUP BY funnel_actors.actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_poe_v2.2 - ''' - WITH funnel_actors AS - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(notEmpty(overrides.distinct_id), overrides.person_id, e.person_id) as person_id, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0)) AS overrides ON e.distinct_id = overrides.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (has(['positive'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) - AND notEmpty(e.person_id) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT funnel_actors.actor_id AS actor_id - FROM funnel_actors - WHERE funnel_actors.steps <> target_step - GROUP BY funnel_actors.actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_poe_v2.3 - ''' - WITH funnel_actors AS - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(notEmpty(overrides.distinct_id), overrides.person_id, e.person_id) as person_id, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0)) AS overrides ON e.distinct_id = overrides.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (has(['negative'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) - AND notEmpty(e.person_id) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT funnel_actors.actor_id AS actor_id - FROM funnel_actors - WHERE funnel_actors.steps = target_step - GROUP BY funnel_actors.actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_poe_v2.4 - ''' - WITH funnel_actors AS - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(notEmpty(overrides.distinct_id), overrides.person_id, e.person_id) as person_id, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0)) AS overrides ON e.distinct_id = overrides.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (has(['negative'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) - AND notEmpty(e.person_id) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT funnel_actors.actor_id AS actor_id - FROM funnel_actors - WHERE funnel_actors.steps <> target_step - GROUP BY funnel_actors.actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_poe_v2.5 - ''' - WITH funnel_actors as - (SELECT aggregation_target AS actor_id , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(notEmpty(overrides.distinct_id), overrides.person_id, e.person_id) as person_id, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0)) AS overrides ON e.distinct_id = overrides.distinct_id - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND notEmpty(e.person_id) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT concat(prop.1, '::', prop.2) as name, - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count - FROM - (SELECT actor_id, - funnel_actors.steps as steps, - arrayJoin(JSONExtractKeysAndValues(groups_0.group_properties_0, 'String')) as prop - FROM funnel_actors - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON funnel_actors.actor_id == groups_0.group_key) aggregation_target_with_props - GROUP BY prop.1, - prop.2 - HAVING prop.1 NOT IN [] - UNION ALL - SELECT 'Total_Values_In_Query' as name, - countDistinctIf(actor_id, steps = target_step) AS success_count, - countDistinctIf(actor_id, steps <> target_step) AS failure_count - FROM funnel_actors - ''' -# --- diff --git a/ee/clickhouse/queries/funnels/test/__snapshots__/test_funnel_correlations_persons.ambr b/ee/clickhouse/queries/funnels/test/__snapshots__/test_funnel_correlations_persons.ambr deleted file mode 100644 index 200f16b611..0000000000 --- a/ee/clickhouse/queries/funnels/test/__snapshots__/test_funnel_correlations_persons.ambr +++ /dev/null @@ -1,785 +0,0 @@ -# serializer version: 1 -# name: TestClickhouseFunnelCorrelationsActors.test_funnel_correlation_on_event_with_recordings - ''' - WITH funnel_actors as - (SELECT aggregation_target AS actor_id, - final_matching_events as matching_events , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner, - groupArray(10)(step_0_matching_event) as step_0_matching_events, - groupArray(10)(step_1_matching_event) as step_1_matching_events, - groupArray(10)(final_matching_event) as final_matching_events , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time, - ("latest_0", - "uuid_0", - "$session_id_0", - "$window_id_0") as step_0_matching_event, - ("latest_1", - "uuid_1", - "$session_id_1", - "$window_id_1") as step_1_matching_event, - if(isNull(latest_0),(null, null, null, null),if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) as final_matching_event , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - ("latest_0", - "uuid_0", - "$session_id_0", - "$window_id_0") as step_0_matching_event, - ("latest_1", - "uuid_1", - "$session_id_1", - "$window_id_1") as step_1_matching_event, - if(isNull(latest_0),(null, null, null, null),if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) as final_matching_event - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - "uuid_0", - "$session_id_0", - "$window_id_0", - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, - last_value("uuid_1") over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) "uuid_1", - last_value("$session_id_1") over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) "$session_id_1", - last_value("$window_id_1") over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) "$window_id_1" - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - e.uuid AS uuid, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = '$pageview', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(step_0 = 1, "uuid", null) as "uuid_0", - if(step_0 = 1, "$session_id", null) as "$session_id_0", - if(step_0 = 1, "$window_id", null) as "$window_id_0", - if(event = 'insight analyzed', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(step_1 = 1, "uuid", null) as "uuid_1", - if(step_1 = 1, "$session_id", null) as "$session_id_1", - if(step_1 = 1, "$window_id", null) as "$window_id_1" - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['$pageview', 'insight analyzed'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event IN ['$pageview', 'insight analyzed'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-01-08 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - toDateTime('2021-01-08 23:59:59', 'UTC') AS date_to, - toDateTime('2021-01-01 00:00:00', 'UTC') AS date_from, - 2 AS target_step, - ['$pageview', 'insight analyzed'] as funnel_step_names - SELECT actors.actor_id AS actor_id , - any(actors.matching_events) AS matching_events - FROM events AS event - JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON pdi.distinct_id = events.distinct_id - JOIN funnel_actors AS actors ON pdi.person_id = actors.actor_id - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 99999 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names - AND event.event = 'insight loaded' - AND actors.steps = target_step - GROUP BY actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelationsActors.test_funnel_correlation_on_event_with_recordings.1 - ''' - - SELECT DISTINCT session_id - FROM session_replay_events - WHERE team_id = 99999 - and session_id in ['s2'] - AND min_first_timestamp >= '2020-12-31 00:00:00' - AND max_last_timestamp <= '2021-01-09 23:59:59' - ''' -# --- -# name: TestClickhouseFunnelCorrelationsActors.test_funnel_correlation_on_event_with_recordings.2 - ''' - WITH funnel_actors as - (SELECT aggregation_target AS actor_id, - final_matching_events as matching_events , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - avg(step_2_conversion_time) step_2_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner, - groupArray(10)(step_0_matching_event) as step_0_matching_events, - groupArray(10)(step_1_matching_event) as step_1_matching_events, - groupArray(10)(step_2_matching_event) as step_2_matching_events, - groupArray(10)(final_matching_event) as final_matching_events , - argMax(latest_0, steps) as timestamp, - argMax(latest_2, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time, - step_2_conversion_time, - ("latest_0", - "uuid_0", - "$session_id_0", - "$window_id_0") as step_0_matching_event, - ("latest_1", - "uuid_1", - "$session_id_1", - "$window_id_1") as step_1_matching_event, - ("latest_2", - "uuid_2", - "$session_id_2", - "$window_id_2") as step_2_matching_event, - if(isNull(latest_0),(null, null, null, null),if(isNull(latest_1), step_0_matching_event, if(isNull(latest_2), step_1_matching_event, step_2_matching_event))) as final_matching_event , - latest_0, - latest_2, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY - AND latest_1 <= latest_2 - AND latest_2 <= latest_0 + INTERVAL 14 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1)) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - if(isNotNull(latest_2) - AND latest_2 <= latest_1 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time, - ("latest_0", - "uuid_0", - "$session_id_0", - "$window_id_0") as step_0_matching_event, - ("latest_1", - "uuid_1", - "$session_id_1", - "$window_id_1") as step_1_matching_event, - ("latest_2", - "uuid_2", - "$session_id_2", - "$window_id_2") as step_2_matching_event, - if(isNull(latest_0),(null, null, null, null),if(isNull(latest_1), step_0_matching_event, if(isNull(latest_2), step_1_matching_event, step_2_matching_event))) as final_matching_event - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - "uuid_0", - "$session_id_0", - "$window_id_0", - step_1, - latest_1, - "uuid_1", - "$session_id_1", - "$window_id_1", - step_2, - min(latest_2) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2, - last_value("uuid_2") over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) "uuid_2", - last_value("$session_id_2") over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) "$session_id_2", - last_value("$window_id_2") over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) "$window_id_2" - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - "uuid_0", - "$session_id_0", - "$window_id_0", - step_1, - latest_1, - "uuid_1", - "$session_id_1", - "$window_id_1", - step_2, - if(latest_2 < latest_1, NULL, latest_2) as latest_2, - if(latest_2 < latest_1, NULL, "uuid_2") as "uuid_2", - if(latest_2 < latest_1, NULL, "$session_id_2") as "$session_id_2", - if(latest_2 < latest_1, NULL, "$window_id_2") as "$window_id_2" - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - "uuid_0", - "$session_id_0", - "$window_id_0", - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, - last_value("uuid_1") over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) "uuid_1", - last_value("$session_id_1") over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) "$session_id_1", - last_value("$window_id_1") over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) "$window_id_1", - step_2, - min(latest_2) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2, - last_value("uuid_2") over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) "uuid_2", - last_value("$session_id_2") over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) "$session_id_2", - last_value("$window_id_2") over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) "$window_id_2" - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - e.uuid AS uuid, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = '$pageview', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(step_0 = 1, "uuid", null) as "uuid_0", - if(step_0 = 1, "$session_id", null) as "$session_id_0", - if(step_0 = 1, "$window_id", null) as "$window_id_0", - if(event = 'insight analyzed', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(step_1 = 1, "uuid", null) as "uuid_1", - if(step_1 = 1, "$session_id", null) as "$session_id_1", - if(step_1 = 1, "$window_id", null) as "$window_id_1", - if(event = 'insight updated', 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2, - if(step_2 = 1, "uuid", null) as "uuid_2", - if(step_2 = 1, "$session_id", null) as "$session_id_2", - if(step_2 = 1, "$window_id", null) as "$window_id_2" - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['$pageview', 'insight analyzed', 'insight updated'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event IN ['$pageview', 'insight analyzed', 'insight updated'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-01-08 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1 - OR step_2 = 1) )))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2, 3] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - toDateTime('2021-01-08 23:59:59', 'UTC') AS date_to, - toDateTime('2021-01-01 00:00:00', 'UTC') AS date_from, - 3 AS target_step, - ['$pageview', 'insight analyzed', 'insight updated'] as funnel_step_names - SELECT actors.actor_id AS actor_id , - any(actors.matching_events) AS matching_events - FROM events AS event - JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON pdi.distinct_id = events.distinct_id - JOIN funnel_actors AS actors ON pdi.person_id = actors.actor_id - WHERE toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to - AND event.team_id = 99999 - AND toTimeZone(toDateTime(event.timestamp), 'UTC') > actors.first_timestamp - AND toTimeZone(toDateTime(event.timestamp), 'UTC') < COALESCE(actors.final_timestamp, actors.first_timestamp + INTERVAL 14 DAY, date_to) - AND event.event NOT IN funnel_step_names - AND event.event = 'insight loaded' - AND actors.steps <> target_step - GROUP BY actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelationsActors.test_funnel_correlation_on_event_with_recordings.3 - ''' - - SELECT DISTINCT session_id - FROM session_replay_events - WHERE team_id = 99999 - and session_id in ['s2'] - AND min_first_timestamp >= '2020-12-31 00:00:00' - AND max_last_timestamp <= '2021-01-09 23:59:59' - ''' -# --- -# name: TestClickhouseFunnelCorrelationsActors.test_funnel_correlation_on_properties_with_recordings - ''' - WITH funnel_actors AS - (SELECT aggregation_target AS actor_id, - final_matching_events as matching_events , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner, - groupArray(10)(step_0_matching_event) as step_0_matching_events, - groupArray(10)(step_1_matching_event) as step_1_matching_events, - groupArray(10)(final_matching_event) as final_matching_events , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time, - ("latest_0", - "uuid_0", - "$session_id_0", - "$window_id_0") as step_0_matching_event, - ("latest_1", - "uuid_1", - "$session_id_1", - "$window_id_1") as step_1_matching_event, - if(isNull(latest_0),(null, null, null, null),if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) as final_matching_event , - latest_0, - latest_1, - latest_0 - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - ("latest_0", - "uuid_0", - "$session_id_0", - "$window_id_0") as step_0_matching_event, - ("latest_1", - "uuid_1", - "$session_id_1", - "$window_id_1") as step_1_matching_event, - if(isNull(latest_0),(null, null, null, null),if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) as final_matching_event - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - "uuid_0", - "$session_id_0", - "$window_id_0", - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, - last_value("uuid_1") over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) "uuid_1", - last_value("$session_id_1") over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) "$session_id_1", - last_value("$window_id_1") over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) "$window_id_1" - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - e.uuid AS uuid, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = '$pageview', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(step_0 = 1, "uuid", null) as "uuid_0", - if(step_0 = 1, "$session_id", null) as "$session_id_0", - if(step_0 = 1, "$window_id", null) as "$window_id_0", - if(event = 'insight analyzed', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(step_1 = 1, "uuid", null) as "uuid_1", - if(step_1 = 1, "$session_id", null) as "$session_id_1", - if(step_1 = 1, "$window_id", null) as "$window_id_1" - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['$pageview', 'insight analyzed'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id - FROM person - WHERE team_id = 99999 - AND id IN - (SELECT id - FROM person - WHERE team_id = 99999 - AND (has(['bar'], replaceRegexpAll(JSONExtractRaw(properties, 'foo'), '^"|"$', ''))) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND (has(['bar'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'foo'), '^"|"$', ''))) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id - WHERE team_id = 99999 - AND event IN ['$pageview', 'insight analyzed'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-01-08 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT funnel_actors.actor_id AS actor_id , - any(funnel_actors.matching_events) AS matching_events - FROM funnel_actors - WHERE funnel_actors.steps = target_step - GROUP BY funnel_actors.actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelationsActors.test_funnel_correlation_on_properties_with_recordings.1 - ''' - - SELECT DISTINCT session_id - FROM session_replay_events - WHERE team_id = 99999 - and session_id in ['s2'] - AND min_first_timestamp >= '2020-12-31 00:00:00' - AND max_last_timestamp <= '2021-01-09 23:59:59' - ''' -# --- -# name: TestClickhouseFunnelCorrelationsActors.test_strict_funnel_correlation_with_recordings - ''' - WITH funnel_actors AS - (SELECT aggregation_target AS actor_id, - final_matching_events as matching_events , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp, - groupArray(10)(step_0_matching_event) as step_0_matching_events, - groupArray(10)(step_1_matching_event) as step_1_matching_events, - groupArray(10)(final_matching_event) as final_matching_events - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0, - ("latest_0", - "uuid_0", - "$session_id_0", - "$window_id_0") as step_0_matching_event, - ("latest_1", - "uuid_1", - "$session_id_1", - "$window_id_1") as step_1_matching_event, - if(isNull(latest_0),(null, null, null, null),if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) as final_matching_event - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps, - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - ("latest_0", - "uuid_0", - "$session_id_0", - "$window_id_0") as step_0_matching_event, - ("latest_1", - "uuid_1", - "$session_id_1", - "$window_id_1") as step_1_matching_event, - if(isNull(latest_0),(null, null, null, null),if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) as final_matching_event - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - "uuid_0", - "$session_id_0", - "$window_id_0", - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) latest_1, - min("uuid_1") over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) "uuid_1", - min("$session_id_1") over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) "$session_id_1", - min("$window_id_1") over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) "$window_id_1" - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - e.uuid AS uuid, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = '$pageview', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(step_0 = 1, "uuid", null) as "uuid_0", - if(step_0 = 1, "$session_id", null) as "$session_id_0", - if(step_0 = 1, "$window_id", null) as "$window_id_0", - if(event = 'insight analyzed', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(step_1 = 1, "uuid", null) as "uuid_1", - if(step_1 = 1, "$session_id", null) as "$session_id_1", - if(step_1 = 1, "$window_id", null) as "$window_id_1" - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id - FROM person - WHERE team_id = 99999 - AND id IN - (SELECT id - FROM person - WHERE team_id = 99999 - AND (has(['bar'], replaceRegexpAll(JSONExtractRaw(properties, 'foo'), '^"|"$', ''))) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND (has(['bar'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'foo'), '^"|"$', ''))) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id - WHERE team_id = 99999 - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-01-08 23:59:59', 'UTC') - AND (1=1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT funnel_actors.actor_id AS actor_id , - any(funnel_actors.matching_events) AS matching_events - FROM funnel_actors - WHERE funnel_actors.steps = target_step - GROUP BY funnel_actors.actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelationsActors.test_strict_funnel_correlation_with_recordings.1 - ''' - - SELECT DISTINCT session_id - FROM session_replay_events - WHERE team_id = 99999 - and session_id in ['s2'] - AND min_first_timestamp >= '2020-12-31 00:00:00' - AND max_last_timestamp <= '2021-01-09 23:59:59' - ''' -# --- -# name: TestClickhouseFunnelCorrelationsActors.test_strict_funnel_correlation_with_recordings.2 - ''' - WITH funnel_actors AS - (SELECT aggregation_target AS actor_id, - final_matching_events as matching_events , timestamp, steps, - final_timestamp, - first_timestamp - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - argMax(latest_0, steps) as timestamp, - argMax(latest_1, steps) as final_timestamp, - argMax(latest_0, steps) as first_timestamp, - groupArray(10)(step_0_matching_event) as step_0_matching_events, - groupArray(10)(step_1_matching_event) as step_1_matching_events, - groupArray(10)(final_matching_event) as final_matching_events - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time , - latest_0, - latest_1, - latest_0, - ("latest_0", - "uuid_0", - "$session_id_0", - "$window_id_0") as step_0_matching_event, - ("latest_1", - "uuid_1", - "$session_id_1", - "$window_id_1") as step_1_matching_event, - if(isNull(latest_0),(null, null, null, null),if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) as final_matching_event - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps, - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - ("latest_0", - "uuid_0", - "$session_id_0", - "$window_id_0") as step_0_matching_event, - ("latest_1", - "uuid_1", - "$session_id_1", - "$window_id_1") as step_1_matching_event, - if(isNull(latest_0),(null, null, null, null),if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) as final_matching_event - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - "uuid_0", - "$session_id_0", - "$window_id_0", - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) latest_1, - min("uuid_1") over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) "uuid_1", - min("$session_id_1") over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) "$session_id_1", - min("$window_id_1") over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) "$window_id_1" - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - e.uuid AS uuid, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = '$pageview', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(step_0 = 1, "uuid", null) as "uuid_0", - if(step_0 = 1, "$session_id", null) as "$session_id_0", - if(step_0 = 1, "$window_id", null) as "$window_id_0", - if(event = 'insight analyzed', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(step_1 = 1, "uuid", null) as "uuid_1", - if(step_1 = 1, "$session_id", null) as "$session_id_1", - if(step_1 = 1, "$window_id", null) as "$window_id_1" - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id - FROM person - WHERE team_id = 99999 - AND id IN - (SELECT id - FROM person - WHERE team_id = 99999 - AND (has(['bar'], replaceRegexpAll(JSONExtractRaw(properties, 'foo'), '^"|"$', ''))) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND (has(['bar'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'foo'), '^"|"$', ''))) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id - WHERE team_id = 99999 - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-01-08 23:59:59', 'UTC') - AND (1=1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000), - 2 AS target_step - SELECT funnel_actors.actor_id AS actor_id , - any(funnel_actors.matching_events) AS matching_events - FROM funnel_actors - WHERE funnel_actors.steps <> target_step - GROUP BY funnel_actors.actor_id - ORDER BY actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseFunnelCorrelationsActors.test_strict_funnel_correlation_with_recordings.3 - ''' - - SELECT DISTINCT session_id - FROM session_replay_events - WHERE team_id = 99999 - and session_id in ['s3'] - AND min_first_timestamp >= '2020-12-31 00:00:00' - AND max_last_timestamp <= '2021-01-09 23:59:59' - ''' -# --- diff --git a/ee/clickhouse/queries/funnels/test/breakdown_cases.py b/ee/clickhouse/queries/funnels/test/breakdown_cases.py deleted file mode 100644 index ce73579b84..0000000000 --- a/ee/clickhouse/queries/funnels/test/breakdown_cases.py +++ /dev/null @@ -1,420 +0,0 @@ -from datetime import datetime -from typing import Any - -from posthog.constants import INSIGHT_FUNNELS -from posthog.models.filters import Filter -from posthog.models.group.util import create_group -from posthog.models.group_type_mapping import GroupTypeMapping -from posthog.models.instance_setting import override_instance_config -from posthog.queries.funnels.funnel_unordered import ClickhouseFunnelUnordered -from posthog.queries.funnels.test.breakdown_cases import ( - FunnelStepResult, - assert_funnel_results_equal, -) -from posthog.test.base import ( - APIBaseTest, - snapshot_clickhouse_queries, - also_test_with_person_on_events_v2, -) -from posthog.test.test_journeys import journeys_for - - -def funnel_breakdown_group_test_factory(Funnel, FunnelPerson, _create_event, _create_action, _create_person): - class TestFunnelBreakdownGroup(APIBaseTest): - def _get_actor_ids_at_step(self, filter, funnel_step, breakdown_value=None): - person_filter = filter.shallow_clone({"funnel_step": funnel_step, "funnel_step_breakdown": breakdown_value}) - _, serialized_result, _ = FunnelPerson(person_filter, self.team).get_actors() - - return [val["id"] for val in serialized_result] - - def _create_groups(self): - GroupTypeMapping.objects.create( - team=self.team, project_id=self.team.project_id, group_type="organization", group_type_index=0 - ) - GroupTypeMapping.objects.create( - team=self.team, project_id=self.team.project_id, group_type="company", group_type_index=1 - ) - - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key="org:5", - properties={"industry": "finance"}, - ) - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key="org:6", - properties={"industry": "technology"}, - ) - create_group( - team_id=self.team.pk, - group_type_index=1, - group_key="org:5", - properties={"industry": "random"}, - ) - - def _assert_funnel_breakdown_result_is_correct(self, result, steps: list[FunnelStepResult]): - def funnel_result(step: FunnelStepResult, order: int) -> dict[str, Any]: - return { - "action_id": step.name if step.type == "events" else step.action_id, - "name": step.name, - "custom_name": None, - "order": order, - "people": [], - "count": step.count, - "type": step.type, - "average_conversion_time": step.average_conversion_time, - "median_conversion_time": step.median_conversion_time, - "breakdown": step.breakdown, - "breakdown_value": step.breakdown, - **( - { - "action_id": None, - "name": f"Completed {order+1} step{'s' if order > 0 else ''}", - } - if Funnel == ClickhouseFunnelUnordered - else {} - ), - } - - step_results = [] - for index, step_result in enumerate(steps): - step_results.append(funnel_result(step_result, index)) - - assert_funnel_results_equal(result, step_results) - - @snapshot_clickhouse_queries - def test_funnel_breakdown_group(self): - self._create_groups() - - people = journeys_for( - { - "person1": [ - { - "event": "sign up", - "timestamp": datetime(2020, 1, 1, 12), - "properties": {"$group_0": "org:5", "$browser": "Chrome"}, - }, - { - "event": "play movie", - "timestamp": datetime(2020, 1, 1, 13), - "properties": {"$group_0": "org:5", "$browser": "Chrome"}, - }, - { - "event": "buy", - "timestamp": datetime(2020, 1, 1, 15), - "properties": {"$group_0": "org:5", "$browser": "Chrome"}, - }, - ], - "person2": [ - { - "event": "sign up", - "timestamp": datetime(2020, 1, 2, 14), - "properties": {"$group_0": "org:6", "$browser": "Safari"}, - }, - { - "event": "play movie", - "timestamp": datetime(2020, 1, 2, 16), - "properties": {"$group_0": "org:6", "$browser": "Safari"}, - }, - ], - "person3": [ - { - "event": "sign up", - "timestamp": datetime(2020, 1, 2, 14), - "properties": {"$group_0": "org:6", "$browser": "Safari"}, - } - ], - }, - self.team, - ) - - filters = { - "events": [ - {"id": "sign up", "order": 0}, - {"id": "play movie", "order": 1}, - {"id": "buy", "order": 2}, - ], - "insight": INSIGHT_FUNNELS, - "date_from": "2020-01-01", - "date_to": "2020-01-08", - "funnel_window_days": 7, - "breakdown": "industry", - "breakdown_type": "group", - "breakdown_group_type_index": 0, - } - - filter = Filter(data=filters, team=self.team) - result = Funnel(filter, self.team).run() - - self._assert_funnel_breakdown_result_is_correct( - result[0], - [ - FunnelStepResult(name="sign up", breakdown="finance", count=1), - FunnelStepResult( - name="play movie", - breakdown="finance", - count=1, - average_conversion_time=3600.0, - median_conversion_time=3600.0, - ), - FunnelStepResult( - name="buy", - breakdown="finance", - count=1, - average_conversion_time=7200.0, - median_conversion_time=7200.0, - ), - ], - ) - - # Querying persons when aggregating by persons should be ok, despite group breakdown - self.assertCountEqual( - self._get_actor_ids_at_step(filter, 1, "finance"), - [people["person1"].uuid], - ) - self.assertCountEqual( - self._get_actor_ids_at_step(filter, 2, "finance"), - [people["person1"].uuid], - ) - - self._assert_funnel_breakdown_result_is_correct( - result[1], - [ - FunnelStepResult(name="sign up", breakdown="technology", count=2), - FunnelStepResult( - name="play movie", - breakdown="technology", - count=1, - average_conversion_time=7200.0, - median_conversion_time=7200.0, - ), - FunnelStepResult(name="buy", breakdown="technology", count=0), - ], - ) - - self.assertCountEqual( - self._get_actor_ids_at_step(filter, 1, "technology"), - [people["person2"].uuid, people["person3"].uuid], - ) - self.assertCountEqual( - self._get_actor_ids_at_step(filter, 2, "technology"), - [people["person2"].uuid], - ) - - # TODO: Delete this test when moved to person-on-events - @also_test_with_person_on_events_v2 - def test_funnel_aggregate_by_groups_breakdown_group(self): - self._create_groups() - - journeys_for( - { - "person1": [ - { - "event": "sign up", - "timestamp": datetime(2020, 1, 1, 12), - "properties": {"$group_0": "org:5", "$browser": "Chrome"}, - }, - { - "event": "play movie", - "timestamp": datetime(2020, 1, 1, 13), - "properties": {"$group_0": "org:5", "$browser": "Chrome"}, - }, - { - "event": "buy", - "timestamp": datetime(2020, 1, 1, 15), - "properties": {"$group_0": "org:5", "$browser": "Chrome"}, - }, - ], - "person2": [ - { - "event": "sign up", - "timestamp": datetime(2020, 1, 2, 14), - "properties": {"$group_0": "org:6", "$browser": "Safari"}, - }, - { - "event": "play movie", - "timestamp": datetime(2020, 1, 2, 16), - "properties": {"$group_0": "org:6", "$browser": "Safari"}, - }, - ], - "person3": [ - { - "event": "buy", - "timestamp": datetime(2020, 1, 2, 18), - "properties": {"$group_0": "org:6", "$browser": "Safari"}, - } - ], - }, - self.team, - ) - - filters = { - "events": [ - {"id": "sign up", "order": 0}, - {"id": "play movie", "order": 1}, - {"id": "buy", "order": 2}, - ], - "insight": INSIGHT_FUNNELS, - "date_from": "2020-01-01", - "date_to": "2020-01-08", - "funnel_window_days": 7, - "breakdown": "industry", - "breakdown_type": "group", - "breakdown_group_type_index": 0, - "aggregation_group_type_index": 0, - } - - result = Funnel(Filter(data=filters, team=self.team), self.team).run() - - self._assert_funnel_breakdown_result_is_correct( - result[0], - [ - FunnelStepResult(name="sign up", breakdown="finance", count=1), - FunnelStepResult( - name="play movie", - breakdown="finance", - count=1, - average_conversion_time=3600.0, - median_conversion_time=3600.0, - ), - FunnelStepResult( - name="buy", - breakdown="finance", - count=1, - average_conversion_time=7200.0, - median_conversion_time=7200.0, - ), - ], - ) - - self._assert_funnel_breakdown_result_is_correct( - result[1], - [ - FunnelStepResult(name="sign up", breakdown="technology", count=1), - FunnelStepResult( - name="play movie", - breakdown="technology", - count=1, - average_conversion_time=7200.0, - median_conversion_time=7200.0, - ), - FunnelStepResult( - name="buy", - breakdown="technology", - count=1, - average_conversion_time=7200.0, - median_conversion_time=7200.0, - ), - ], - ) - - @also_test_with_person_on_events_v2 - @snapshot_clickhouse_queries - def test_funnel_aggregate_by_groups_breakdown_group_person_on_events(self): - self._create_groups() - - journeys_for( - { - "person1": [ - { - "event": "sign up", - "timestamp": datetime(2020, 1, 1, 12), - "properties": {"$group_0": "org:5", "$browser": "Chrome"}, - }, - { - "event": "play movie", - "timestamp": datetime(2020, 1, 1, 13), - "properties": {"$group_0": "org:5", "$browser": "Chrome"}, - }, - { - "event": "buy", - "timestamp": datetime(2020, 1, 1, 15), - "properties": {"$group_0": "org:5", "$browser": "Chrome"}, - }, - ], - "person2": [ - { - "event": "sign up", - "timestamp": datetime(2020, 1, 2, 14), - "properties": {"$group_0": "org:6", "$browser": "Safari"}, - }, - { - "event": "play movie", - "timestamp": datetime(2020, 1, 2, 16), - "properties": {"$group_0": "org:6", "$browser": "Safari"}, - }, - ], - "person3": [ - { - "event": "buy", - "timestamp": datetime(2020, 1, 2, 18), - "properties": {"$group_0": "org:6", "$browser": "Safari"}, - } - ], - }, - self.team, - ) - - filters = { - "events": [ - {"id": "sign up", "order": 0}, - {"id": "play movie", "order": 1}, - {"id": "buy", "order": 2}, - ], - "insight": INSIGHT_FUNNELS, - "date_from": "2020-01-01", - "date_to": "2020-01-08", - "funnel_window_days": 7, - "breakdown": "industry", - "breakdown_type": "group", - "breakdown_group_type_index": 0, - "aggregation_group_type_index": 0, - } - with override_instance_config("PERSON_ON_EVENTS_ENABLED", True): - result = Funnel(Filter(data=filters, team=self.team), self.team).run() - - self._assert_funnel_breakdown_result_is_correct( - result[0], - [ - FunnelStepResult(name="sign up", breakdown="finance", count=1), - FunnelStepResult( - name="play movie", - breakdown="finance", - count=1, - average_conversion_time=3600.0, - median_conversion_time=3600.0, - ), - FunnelStepResult( - name="buy", - breakdown="finance", - count=1, - average_conversion_time=7200.0, - median_conversion_time=7200.0, - ), - ], - ) - - self._assert_funnel_breakdown_result_is_correct( - result[1], - [ - FunnelStepResult(name="sign up", breakdown="technology", count=1), - FunnelStepResult( - name="play movie", - breakdown="technology", - count=1, - average_conversion_time=7200.0, - median_conversion_time=7200.0, - ), - FunnelStepResult( - name="buy", - breakdown="technology", - count=1, - average_conversion_time=7200.0, - median_conversion_time=7200.0, - ), - ], - ) - - return TestFunnelBreakdownGroup diff --git a/ee/clickhouse/queries/funnels/test/test_funnel.py b/ee/clickhouse/queries/funnels/test/test_funnel.py deleted file mode 100644 index a48642198f..0000000000 --- a/ee/clickhouse/queries/funnels/test/test_funnel.py +++ /dev/null @@ -1,209 +0,0 @@ -from datetime import datetime - -from ee.clickhouse.queries.funnels.test.breakdown_cases import ( - funnel_breakdown_group_test_factory, -) -from posthog.constants import INSIGHT_FUNNELS -from posthog.models.action import Action -from posthog.models.cohort import Cohort -from posthog.models.filters import Filter -from posthog.models.group.util import create_group -from posthog.models.group_type_mapping import GroupTypeMapping -from posthog.queries.funnels.funnel import ClickhouseFunnel -from posthog.queries.funnels.funnel_persons import ClickhouseFunnelActors -from posthog.queries.funnels.funnel_strict_persons import ClickhouseFunnelStrictActors -from posthog.queries.funnels.funnel_unordered_persons import ( - ClickhouseFunnelUnorderedActors, -) -from posthog.queries.funnels.test.test_funnel import _create_action -from posthog.test.base import ( - APIBaseTest, - ClickhouseTestMixin, - _create_event, - _create_person, -) -from posthog.test.test_journeys import journeys_for - - -class TestFunnelGroupBreakdown( - ClickhouseTestMixin, - funnel_breakdown_group_test_factory( - ClickhouseFunnel, - ClickhouseFunnelActors, - _create_event, - _create_action, - _create_person, - ), -): # type: ignore - pass - - -class TestUnorderedFunnelGroupBreakdown( - ClickhouseTestMixin, - funnel_breakdown_group_test_factory( - ClickhouseFunnel, - ClickhouseFunnelUnorderedActors, - _create_event, - _create_action, - _create_person, - ), -): # type: ignore - pass - - -class TestStrictFunnelGroupBreakdown( - ClickhouseTestMixin, - funnel_breakdown_group_test_factory( - ClickhouseFunnel, - ClickhouseFunnelStrictActors, - _create_event, - _create_action, - _create_person, - ), -): # type: ignore - pass - - -class TestClickhouseFunnel(ClickhouseTestMixin, APIBaseTest): - maxDiff = None - - def test_funnel_aggregation_with_groups_with_cohort_filtering(self): - GroupTypeMapping.objects.create( - team=self.team, project_id=self.team.project_id, group_type="organization", group_type_index=0 - ) - GroupTypeMapping.objects.create( - team=self.team, project_id=self.team.project_id, group_type="company", group_type_index=1 - ) - - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key="org:5", - properties={"industry": "finance"}, - ) - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key="org:6", - properties={"industry": "technology"}, - ) - - create_group( - team_id=self.team.pk, - group_type_index=1, - group_key="company:1", - properties={}, - ) - create_group( - team_id=self.team.pk, - group_type_index=1, - group_key="company:2", - properties={}, - ) - - _create_person( - distinct_ids=[f"user_1"], - team=self.team, - properties={"email": "fake@test.com"}, - ) - _create_person( - distinct_ids=[f"user_2"], - team=self.team, - properties={"email": "fake@test.com"}, - ) - _create_person( - distinct_ids=[f"user_3"], - team=self.team, - properties={"email": "fake_2@test.com"}, - ) - - Action.objects.create(team=self.team, name="action1", steps_json=[{"event": "$pageview"}]) - - cohort = Cohort.objects.create( - team=self.team, - groups=[ - { - "properties": [ - { - "key": "email", - "operator": "icontains", - "value": "fake@test.com", - "type": "person", - } - ] - } - ], - ) - - events_by_person = { - "user_1": [ - { - "event": "$pageview", - "timestamp": datetime(2020, 1, 2, 14), - "properties": {"$group_0": "org:5"}, - }, - { - "event": "user signed up", - "timestamp": datetime(2020, 1, 2, 14), - "properties": {"$group_0": "org:5"}, - }, - { - "event": "user signed up", # same person, different group, so should count as different step 1 in funnel - "timestamp": datetime(2020, 1, 10, 14), - "properties": {"$group_0": "org:6"}, - }, - ], - "user_2": [ - { # different person, same group, so should count as step two in funnel - "event": "paid", - "timestamp": datetime(2020, 1, 3, 14), - "properties": {"$group_0": "org:5"}, - } - ], - "user_3": [ - { - "event": "user signed up", - "timestamp": datetime(2020, 1, 2, 14), - "properties": {"$group_0": "org:7"}, - }, - { # person not in cohort so should be filtered out - "event": "paid", - "timestamp": datetime(2020, 1, 3, 14), - "properties": {"$group_0": "org:7"}, - }, - ], - } - journeys_for(events_by_person, self.team) - cohort.calculate_people_ch(pending_version=0) - - filters = { - "events": [ - { - "id": "user signed up", - "type": "events", - "order": 0, - "properties": [ - { - "type": "precalculated-cohort", - "key": "id", - "value": cohort.pk, - } - ], - }, - {"id": "paid", "type": "events", "order": 1}, - ], - "insight": INSIGHT_FUNNELS, - "date_from": "2020-01-01", - "date_to": "2020-01-14", - "aggregation_group_type_index": 0, - } - - filter = Filter(data=filters) - funnel = ClickhouseFunnel(filter, self.team) - result = funnel.run() - - self.assertEqual(result[0]["name"], "user signed up") - self.assertEqual(result[0]["count"], 2) - - self.assertEqual(result[1]["name"], "paid") - self.assertEqual(result[1]["count"], 1) diff --git a/ee/clickhouse/queries/funnels/test/test_funnel_correlation.py b/ee/clickhouse/queries/funnels/test/test_funnel_correlation.py deleted file mode 100644 index bb470c2752..0000000000 --- a/ee/clickhouse/queries/funnels/test/test_funnel_correlation.py +++ /dev/null @@ -1,2088 +0,0 @@ -import unittest - -from rest_framework.exceptions import ValidationError - -from ee.clickhouse.queries.funnels.funnel_correlation import ( - EventContingencyTable, - EventStats, - FunnelCorrelation, -) -from ee.clickhouse.queries.funnels.funnel_correlation_persons import ( - FunnelCorrelationActors, -) -from posthog.constants import INSIGHT_FUNNELS -from posthog.models.action import Action -from posthog.models.element import Element -from posthog.models.filters import Filter -from posthog.models.group.util import create_group -from posthog.models.group_type_mapping import GroupTypeMapping -from posthog.models.instance_setting import override_instance_config -from posthog.test.base import ( - APIBaseTest, - ClickhouseTestMixin, - _create_event, - _create_person, - also_test_with_materialized_columns, - flush_persons_and_events, - snapshot_clickhouse_queries, - also_test_with_person_on_events_v2, -) -from posthog.test.test_journeys import journeys_for - - -def _create_action(**kwargs): - team = kwargs.pop("team") - name = kwargs.pop("name") - properties = kwargs.pop("properties", {}) - action = Action.objects.create(team=team, name=name, steps_json=[{"event": name, "properties": properties}]) - return action - - -class TestClickhouseFunnelCorrelation(ClickhouseTestMixin, APIBaseTest): - maxDiff = None - - def _get_actors_for_event(self, filter: Filter, event_name: str, properties=None, success=True): - actor_filter = filter.shallow_clone( - { - "funnel_correlation_person_entity": { - "id": event_name, - "type": "events", - "properties": properties, - }, - "funnel_correlation_person_converted": "TrUe" if success else "falSE", - } - ) - - _, serialized_actors, _ = FunnelCorrelationActors(actor_filter, self.team).get_actors() - return [str(row["id"]) for row in serialized_actors] - - def _get_actors_for_property(self, filter: Filter, property_values: list, success=True): - actor_filter = filter.shallow_clone( - { - "funnel_correlation_property_values": [ - { - "key": prop, - "value": value, - "type": type, - "group_type_index": group_type_index, - } - for prop, value, type, group_type_index in property_values - ], - "funnel_correlation_person_converted": "TrUe" if success else "falSE", - } - ) - _, serialized_actors, _ = FunnelCorrelationActors(actor_filter, self.team).get_actors() - return [str(row["id"]) for row in serialized_actors] - - def test_basic_funnel_correlation_with_events(self): - filters = { - "events": [ - {"id": "user signed up", "type": "events", "order": 0}, - {"id": "paid", "type": "events", "order": 1}, - ], - "insight": INSIGHT_FUNNELS, - "date_from": "2020-01-01", - "date_to": "2020-01-14", - "funnel_correlation_type": "events", - } - - filter = Filter(data=filters) - correlation = FunnelCorrelation(filter, self.team) - - for i in range(10): - _create_person(distinct_ids=[f"user_{i}"], team_id=self.team.pk) - _create_event( - team=self.team, - event="user signed up", - distinct_id=f"user_{i}", - timestamp="2020-01-02T14:00:00Z", - ) - if i % 2 == 0: - _create_event( - team=self.team, - event="positively_related", - distinct_id=f"user_{i}", - timestamp="2020-01-03T14:00:00Z", - ) - _create_event( - team=self.team, - event="paid", - distinct_id=f"user_{i}", - timestamp="2020-01-04T14:00:00Z", - ) - - for i in range(10, 20): - _create_person(distinct_ids=[f"user_{i}"], team_id=self.team.pk) - _create_event( - team=self.team, - event="user signed up", - distinct_id=f"user_{i}", - timestamp="2020-01-02T14:00:00Z", - ) - if i % 2 == 0: - _create_event( - team=self.team, - event="negatively_related", - distinct_id=f"user_{i}", - timestamp="2020-01-03T14:00:00Z", - ) - - result = correlation._run()[0] - - odds_ratios = [item.pop("odds_ratio") for item in result] # type: ignore - expected_odds_ratios = [11, 1 / 11] - - for odds, expected_odds in zip(odds_ratios, expected_odds_ratios): - self.assertAlmostEqual(odds, expected_odds) - - self.assertEqual( - result, - [ - { - "event": "positively_related", - "success_count": 5, - "failure_count": 0, - # "odds_ratio": 11.0, - "correlation_type": "success", - }, - { - "event": "negatively_related", - "success_count": 0, - "failure_count": 5, - # "odds_ratio": 1 / 11, - "correlation_type": "failure", - }, - ], - ) - - self.assertEqual(len(self._get_actors_for_event(filter, "positively_related")), 5) - self.assertEqual( - len(self._get_actors_for_event(filter, "positively_related", success=False)), - 0, - ) - self.assertEqual( - len(self._get_actors_for_event(filter, "negatively_related", success=False)), - 5, - ) - self.assertEqual(len(self._get_actors_for_event(filter, "negatively_related")), 0) - - # Now exclude positively_related - filter = filter.shallow_clone({"funnel_correlation_exclude_event_names": ["positively_related"]}) - correlation = FunnelCorrelation(filter, self.team) - - result = correlation._run()[0] - - odds_ratio = result[0].pop("odds_ratio") # type: ignore - expected_odds_ratio = 1 / 11 - - self.assertAlmostEqual(odds_ratio, expected_odds_ratio) - - self.assertEqual( - result, - [ - { - "event": "negatively_related", - "success_count": 0, - "failure_count": 5, - # "odds_ratio": 1 / 11, - "correlation_type": "failure", - } - ], - ) - # Getting specific people isn't affected by exclude_events - self.assertEqual(len(self._get_actors_for_event(filter, "positively_related")), 5) - self.assertEqual( - len(self._get_actors_for_event(filter, "positively_related", success=False)), - 0, - ) - self.assertEqual( - len(self._get_actors_for_event(filter, "negatively_related", success=False)), - 5, - ) - self.assertEqual(len(self._get_actors_for_event(filter, "negatively_related")), 0) - - @snapshot_clickhouse_queries - def test_action_events_are_excluded_from_correlations(self): - journey = {} - - for i in range(3): - person_id = f"user_{i}" - events = [ - { - "event": "user signed up", - "timestamp": "2020-01-02T14:00:00", - "properties": {"key": "val"}, - }, - # same event, but missing property, so not part of action. - {"event": "user signed up", "timestamp": "2020-01-02T14:10:00"}, - ] - if i % 2 == 0: - events.append({"event": "positively_related", "timestamp": "2020-01-03T14:00:00"}) - events.append( - { - "event": "paid", - "timestamp": "2020-01-04T14:00:00", - "properties": {"key": "val"}, - } - ) - - journey[person_id] = events - - # one failure needed - journey["failure"] = [ - { - "event": "user signed up", - "timestamp": "2020-01-02T14:00:00", - "properties": {"key": "val"}, - } - ] - - journeys_for(events_by_person=journey, team=self.team) # type: ignore - - sign_up_action = _create_action( - name="user signed up", - team=self.team, - properties=[{"key": "key", "type": "event", "value": ["val"], "operator": "exact"}], - ) - - paid_action = _create_action( - name="paid", - team=self.team, - properties=[{"key": "key", "type": "event", "value": ["val"], "operator": "exact"}], - ) - filters = { - "events": [], - "actions": [ - {"id": sign_up_action.id, "order": 0}, - {"id": paid_action.id, "order": 1}, - ], - "insight": INSIGHT_FUNNELS, - "date_from": "2020-01-01", - "date_to": "2020-01-14", - "funnel_correlation_type": "events", - } - - filter = Filter(data=filters) - correlation = FunnelCorrelation(filter, self.team) - result = correlation._run()[0] - - # Β missing user signed up and paid from result set, as expected - self.assertEqual( - result, - [ - { - "event": "positively_related", - "success_count": 2, - "failure_count": 0, - "odds_ratio": 3, - "correlation_type": "success", - } - ], - ) - - @also_test_with_person_on_events_v2 - @snapshot_clickhouse_queries - def test_funnel_correlation_with_events_and_groups(self): - GroupTypeMapping.objects.create( - team=self.team, project_id=self.team.project_id, group_type="organization", group_type_index=0 - ) - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key="org:5", - properties={"industry": "finance"}, - ) - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key="org:7", - properties={"industry": "finance"}, - ) - - for i in range(10, 20): - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key=f"org:{i}", - properties={}, - ) - _create_person(distinct_ids=[f"user_{i}"], team_id=self.team.pk) - _create_event( - team=self.team, - event="user signed up", - distinct_id=f"user_{i}", - timestamp="2020-01-02T14:00:00Z", - properties={"$group_0": f"org:{i}"}, - ) - if i % 2 == 0: - _create_event( - team=self.team, - event="positively_related", - distinct_id=f"user_{i}", - timestamp="2020-01-03T14:00:00Z", - properties={"$group_0": f"org:{i}"}, - ) - # this event shouldn't show up when dealing with groups - _create_event( - team=self.team, - event="positively_related_without_group", - distinct_id=f"user_{i}", - timestamp="2020-01-03T14:00:00Z", - ) - _create_event( - team=self.team, - event="paid", - distinct_id=f"user_{i}", - timestamp="2020-01-04T14:00:00Z", - properties={"$group_0": f"org:{i}"}, - ) - - # one fail group - _create_person(distinct_ids=[f"user_fail"], team_id=self.team.pk) - _create_event( - team=self.team, - event="user signed up", - distinct_id=f"user_fail", - timestamp="2020-01-02T14:00:00Z", - properties={"$group_0": f"org:5"}, - ) - _create_event( - team=self.team, - event="negatively_related", - distinct_id=f"user_{i}", - timestamp="2020-01-03T14:00:00Z", - properties={"$group_0": f"org:5"}, - ) - - # one success group with same filter property - _create_person(distinct_ids=[f"user_succ"], team_id=self.team.pk) - _create_event( - team=self.team, - event="user signed up", - distinct_id=f"user_succ", - timestamp="2020-01-02T14:00:00Z", - properties={"$group_0": f"org:7"}, - ) - _create_event( - team=self.team, - event="negatively_related", - distinct_id=f"user_{i}", - timestamp="2020-01-03T14:00:00Z", - properties={"$group_0": f"org:7"}, - ) - _create_event( - team=self.team, - event="paid", - distinct_id=f"user_succ", - timestamp="2020-01-04T14:00:00Z", - properties={"$group_0": f"org:7"}, - ) - - filters = { - "events": [ - {"id": "user signed up", "type": "events", "order": 0}, - {"id": "paid", "type": "events", "order": 1}, - ], - "insight": INSIGHT_FUNNELS, - "date_from": "2020-01-01", - "date_to": "2020-01-14", - "funnel_correlation_type": "events", - "aggregation_group_type_index": 0, - } - - filter = Filter(data=filters) - result = FunnelCorrelation(filter, self.team)._run()[0] - - odds_ratios = [item.pop("odds_ratio") for item in result] # type: ignore - expected_odds_ratios = [12 / 7, 1 / 11] - - for odds, expected_odds in zip(odds_ratios, expected_odds_ratios): - self.assertAlmostEqual(odds, expected_odds) - - self.assertEqual( - result, - [ - { - "event": "positively_related", - "success_count": 5, - "failure_count": 0, - # "odds_ratio": 12/7, - "correlation_type": "success", - }, - { - "event": "negatively_related", - "success_count": 1, - "failure_count": 1, - # "odds_ratio": 1 / 11, - "correlation_type": "failure", - }, - ], - ) - - self.assertEqual(len(self._get_actors_for_event(filter, "positively_related")), 5) - self.assertEqual( - len(self._get_actors_for_event(filter, "positively_related", success=False)), - 0, - ) - self.assertEqual(len(self._get_actors_for_event(filter, "negatively_related")), 1) - self.assertEqual( - len(self._get_actors_for_event(filter, "negatively_related", success=False)), - 1, - ) - - # Now exclude all groups in positive - filter = filter.shallow_clone( - { - "properties": [ - { - "key": "industry", - "value": "finance", - "type": "group", - "group_type_index": 0, - } - ] - } - ) - result = FunnelCorrelation(filter, self.team)._run()[0] - - odds_ratio = result[0].pop("odds_ratio") # type: ignore - expected_odds_ratio = 1 - # success total and failure totals remove other groups too - - self.assertAlmostEqual(odds_ratio, expected_odds_ratio) - - self.assertEqual( - result, - [ - { - "event": "negatively_related", - "success_count": 1, - "failure_count": 1, - # "odds_ratio": 1, - "correlation_type": "failure", - } - ], - ) - - self.assertEqual(len(self._get_actors_for_event(filter, "negatively_related")), 1) - self.assertEqual( - len(self._get_actors_for_event(filter, "negatively_related", success=False)), - 1, - ) - - @also_test_with_materialized_columns(event_properties=[], person_properties=["$browser"]) - @snapshot_clickhouse_queries - def test_basic_funnel_correlation_with_properties(self): - filters = { - "events": [ - {"id": "user signed up", "type": "events", "order": 0}, - {"id": "paid", "type": "events", "order": 1}, - ], - "insight": INSIGHT_FUNNELS, - "date_from": "2020-01-01", - "date_to": "2020-01-14", - "funnel_correlation_type": "properties", - "funnel_correlation_names": ["$browser"], - } - - filter = Filter(data=filters) - correlation = FunnelCorrelation(filter, self.team) - - for i in range(10): - _create_person( - distinct_ids=[f"user_{i}"], - team_id=self.team.pk, - properties={"$browser": "Positive"}, - ) - _create_event( - team=self.team, - event="user signed up", - distinct_id=f"user_{i}", - timestamp="2020-01-02T14:00:00Z", - ) - _create_event( - team=self.team, - event="paid", - distinct_id=f"user_{i}", - timestamp="2020-01-04T14:00:00Z", - ) - - for i in range(10, 20): - _create_person( - distinct_ids=[f"user_{i}"], - team_id=self.team.pk, - properties={"$browser": "Negative"}, - ) - _create_event( - team=self.team, - event="user signed up", - distinct_id=f"user_{i}", - timestamp="2020-01-02T14:00:00Z", - ) - if i % 2 == 0: - _create_event( - team=self.team, - event="negatively_related", - distinct_id=f"user_{i}", - timestamp="2020-01-03T14:00:00Z", - ) - - # One Positive with failure - _create_person( - distinct_ids=[f"user_fail"], - team_id=self.team.pk, - properties={"$browser": "Positive"}, - ) - _create_event( - team=self.team, - event="user signed up", - distinct_id=f"user_fail", - timestamp="2020-01-02T14:00:00Z", - ) - - # One Negative with success - _create_person( - distinct_ids=[f"user_succ"], - team_id=self.team.pk, - properties={"$browser": "Negative"}, - ) - _create_event( - team=self.team, - event="user signed up", - distinct_id=f"user_succ", - timestamp="2020-01-02T14:00:00Z", - ) - _create_event( - team=self.team, - event="paid", - distinct_id=f"user_succ", - timestamp="2020-01-04T14:00:00Z", - ) - - result = correlation._run()[0] - - odds_ratios = [item.pop("odds_ratio") for item in result] # type: ignore - - # Success Total = 11, Failure Total = 11 - # - # Browser::Positive - # Success: 10 - # Failure: 1 - - # Browser::Negative - # Success: 1 - # Failure: 10 - - prior_count = 1 - expected_odds_ratios = [ - ((10 + prior_count) / (1 + prior_count)) * ((11 - 1 + prior_count) / (11 - 10 + prior_count)), - ((1 + prior_count) / (10 + prior_count)) * ((11 - 10 + prior_count) / (11 - 1 + prior_count)), - ] - - for odds, expected_odds in zip(odds_ratios, expected_odds_ratios): - self.assertAlmostEqual(odds, expected_odds) - - self.assertEqual( - result, - [ - { - "event": "$browser::Positive", - "success_count": 10, - "failure_count": 1, - # "odds_ratio": 121/4, - "correlation_type": "success", - }, - { - "event": "$browser::Negative", - "success_count": 1, - "failure_count": 10, - # "odds_ratio": 4/121, - "correlation_type": "failure", - }, - ], - ) - - self.assertEqual( - len(self._get_actors_for_property(filter, [("$browser", "Positive", "person", None)])), - 10, - ) - self.assertEqual( - len(self._get_actors_for_property(filter, [("$browser", "Positive", "person", None)], False)), - 1, - ) - self.assertEqual( - len(self._get_actors_for_property(filter, [("$browser", "Negative", "person", None)])), - 1, - ) - self.assertEqual( - len(self._get_actors_for_property(filter, [("$browser", "Negative", "person", None)], False)), - 10, - ) - - # TODO: Delete this test when moved to person-on-events - @also_test_with_materialized_columns( - event_properties=[], person_properties=["$browser"], verify_no_jsonextract=False - ) - @snapshot_clickhouse_queries - def test_funnel_correlation_with_properties_and_groups(self): - GroupTypeMapping.objects.create( - team=self.team, project_id=self.team.project_id, group_type="organization", group_type_index=0 - ) - - for i in range(10): - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key=f"org:{i}", - properties={"industry": "positive"}, - ) - _create_person( - distinct_ids=[f"user_{i}"], - team_id=self.team.pk, - properties={"$browser": "Positive"}, - ) - _create_event( - team=self.team, - event="user signed up", - distinct_id=f"user_{i}", - timestamp="2020-01-02T14:00:00Z", - properties={"$group_0": f"org:{i}"}, - ) - _create_event( - team=self.team, - event="paid", - distinct_id=f"user_{i}", - timestamp="2020-01-04T14:00:00Z", - properties={"$group_0": f"org:{i}"}, - ) - - for i in range(10, 20): - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key=f"org:{i}", - properties={"industry": "negative"}, - ) - _create_person( - distinct_ids=[f"user_{i}"], - team_id=self.team.pk, - properties={"$browser": "Negative"}, - ) - _create_event( - team=self.team, - event="user signed up", - distinct_id=f"user_{i}", - timestamp="2020-01-02T14:00:00Z", - properties={"$group_0": f"org:{i}"}, - ) - if i % 2 == 0: - _create_event( - team=self.team, - event="negatively_related", - distinct_id=f"user_{i}", - timestamp="2020-01-03T14:00:00Z", - properties={"$group_0": f"org:{i}"}, - ) - - # One Positive with failure - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key=f"org:fail", - properties={"industry": "positive"}, - ) - _create_person( - distinct_ids=[f"user_fail"], - team_id=self.team.pk, - properties={"$browser": "Positive"}, - ) - _create_event( - team=self.team, - event="user signed up", - distinct_id=f"user_fail", - timestamp="2020-01-02T14:00:00Z", - properties={"$group_0": f"org:fail"}, - ) - - # One Negative with success - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key=f"org:succ", - properties={"industry": "negative"}, - ) - _create_person( - distinct_ids=[f"user_succ"], - team_id=self.team.pk, - properties={"$browser": "Negative"}, - ) - _create_event( - team=self.team, - event="user signed up", - distinct_id=f"user_succ", - timestamp="2020-01-02T14:00:00Z", - properties={"$group_0": f"org:succ"}, - ) - _create_event( - team=self.team, - event="paid", - distinct_id=f"user_succ", - timestamp="2020-01-04T14:00:00Z", - properties={"$group_0": f"org:succ"}, - ) - - filters = { - "events": [ - {"id": "user signed up", "type": "events", "order": 0}, - {"id": "paid", "type": "events", "order": 1}, - ], - "insight": INSIGHT_FUNNELS, - "date_from": "2020-01-01", - "date_to": "2020-01-14", - "funnel_correlation_type": "properties", - "funnel_correlation_names": ["industry"], - "aggregation_group_type_index": 0, - } - - filter = Filter(data=filters) - correlation = FunnelCorrelation(filter, self.team) - result = correlation._run()[0] - - odds_ratios = [item.pop("odds_ratio") for item in result] # type: ignore - - # Success Total = 11, Failure Total = 11 - # - # Industry::Positive - # Success: 10 - # Failure: 1 - - # Industry::Negative - # Success: 1 - # Failure: 10 - - prior_count = 1 - expected_odds_ratios = [ - ((10 + prior_count) / (1 + prior_count)) * ((11 - 1 + prior_count) / (11 - 10 + prior_count)), - ((1 + prior_count) / (10 + prior_count)) * ((11 - 10 + prior_count) / (11 - 1 + prior_count)), - ] - - for odds, expected_odds in zip(odds_ratios, expected_odds_ratios): - self.assertAlmostEqual(odds, expected_odds) - - self.assertEqual( - result, - [ - { - "event": "industry::positive", - "success_count": 10, - "failure_count": 1, - # "odds_ratio": 121/4, - "correlation_type": "success", - }, - { - "event": "industry::negative", - "success_count": 1, - "failure_count": 10, - # "odds_ratio": 4/121, - "correlation_type": "failure", - }, - ], - ) - - self.assertEqual( - len(self._get_actors_for_property(filter, [("industry", "positive", "group", 0)])), - 10, - ) - self.assertEqual( - len(self._get_actors_for_property(filter, [("industry", "positive", "group", 0)], False)), - 1, - ) - self.assertEqual( - len(self._get_actors_for_property(filter, [("industry", "negative", "group", 0)])), - 1, - ) - self.assertEqual( - len(self._get_actors_for_property(filter, [("industry", "negative", "group", 0)], False)), - 10, - ) - - # test with `$all` as property - # _run property correlation with filter on all properties - filter = filter.shallow_clone({"funnel_correlation_names": ["$all"]}) - correlation = FunnelCorrelation(filter, self.team) - - new_result = correlation._run()[0] - - odds_ratios = [item.pop("odds_ratio") for item in new_result] # type: ignore - - for odds, expected_odds in zip(odds_ratios, expected_odds_ratios): - self.assertAlmostEqual(odds, expected_odds) - - self.assertEqual(new_result, result) - - @also_test_with_materialized_columns( - event_properties=[], - person_properties=["$browser"], - verify_no_jsonextract=False, - ) - @also_test_with_person_on_events_v2 - @snapshot_clickhouse_queries - def test_funnel_correlation_with_properties_and_groups_person_on_events(self): - GroupTypeMapping.objects.create( - team=self.team, project_id=self.team.project_id, group_type="organization", group_type_index=0 - ) - - for i in range(10): - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key=f"org:{i}", - properties={"industry": "positive"}, - ) - _create_person( - distinct_ids=[f"user_{i}"], - team_id=self.team.pk, - properties={"$browser": "Positive"}, - ) - _create_event( - team=self.team, - event="user signed up", - distinct_id=f"user_{i}", - timestamp="2020-01-02T14:00:00Z", - properties={"$group_0": f"org:{i}"}, - ) - _create_event( - team=self.team, - event="paid", - distinct_id=f"user_{i}", - timestamp="2020-01-04T14:00:00Z", - properties={"$group_0": f"org:{i}"}, - ) - - for i in range(10, 20): - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key=f"org:{i}", - properties={"industry": "negative"}, - ) - _create_person( - distinct_ids=[f"user_{i}"], - team_id=self.team.pk, - properties={"$browser": "Negative"}, - ) - _create_event( - team=self.team, - event="user signed up", - distinct_id=f"user_{i}", - timestamp="2020-01-02T14:00:00Z", - properties={"$group_0": f"org:{i}"}, - ) - if i % 2 == 0: - _create_event( - team=self.team, - event="negatively_related", - distinct_id=f"user_{i}", - timestamp="2020-01-03T14:00:00Z", - properties={"$group_0": f"org:{i}"}, - ) - - # One Positive with failure - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key=f"org:fail", - properties={"industry": "positive"}, - ) - _create_person( - distinct_ids=[f"user_fail"], - team_id=self.team.pk, - properties={"$browser": "Positive"}, - ) - _create_event( - team=self.team, - event="user signed up", - distinct_id=f"user_fail", - timestamp="2020-01-02T14:00:00Z", - properties={"$group_0": f"org:fail"}, - ) - - # One Negative with success - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key=f"org:succ", - properties={"industry": "negative"}, - ) - _create_person( - distinct_ids=[f"user_succ"], - team_id=self.team.pk, - properties={"$browser": "Negative"}, - ) - _create_event( - team=self.team, - event="user signed up", - distinct_id=f"user_succ", - timestamp="2020-01-02T14:00:00Z", - properties={"$group_0": f"org:succ"}, - ) - _create_event( - team=self.team, - event="paid", - distinct_id=f"user_succ", - timestamp="2020-01-04T14:00:00Z", - properties={"$group_0": f"org:succ"}, - ) - - filters = { - "events": [ - {"id": "user signed up", "type": "events", "order": 0}, - {"id": "paid", "type": "events", "order": 1}, - ], - "insight": INSIGHT_FUNNELS, - "date_from": "2020-01-01", - "date_to": "2020-01-14", - "funnel_correlation_type": "properties", - "funnel_correlation_names": ["industry"], - "aggregation_group_type_index": 0, - } - - with override_instance_config("PERSON_ON_EVENTS_ENABLED", True): - filter = Filter(data=filters) - correlation = FunnelCorrelation(filter, self.team) - result = correlation._run()[0] - - odds_ratios = [item.pop("odds_ratio") for item in result] # type: ignore - - # Success Total = 11, Failure Total = 11 - # - # Industry::Positive - # Success: 10 - # Failure: 1 - - # Industry::Negative - # Success: 1 - # Failure: 10 - - prior_count = 1 - expected_odds_ratios = [ - ((10 + prior_count) / (1 + prior_count)) * ((11 - 1 + prior_count) / (11 - 10 + prior_count)), - ((1 + prior_count) / (10 + prior_count)) * ((11 - 10 + prior_count) / (11 - 1 + prior_count)), - ] - - for odds, expected_odds in zip(odds_ratios, expected_odds_ratios): - self.assertAlmostEqual(odds, expected_odds) - - self.assertEqual( - result, - [ - { - "event": "industry::positive", - "success_count": 10, - "failure_count": 1, - # "odds_ratio": 121/4, - "correlation_type": "success", - }, - { - "event": "industry::negative", - "success_count": 1, - "failure_count": 10, - # "odds_ratio": 4/121, - "correlation_type": "failure", - }, - ], - ) - - self.assertEqual( - len(self._get_actors_for_property(filter, [("industry", "positive", "group", 0)])), - 10, - ) - self.assertEqual( - len(self._get_actors_for_property(filter, [("industry", "positive", "group", 0)], False)), - 1, - ) - self.assertEqual( - len(self._get_actors_for_property(filter, [("industry", "negative", "group", 0)])), - 1, - ) - self.assertEqual( - len(self._get_actors_for_property(filter, [("industry", "negative", "group", 0)], False)), - 10, - ) - - # test with `$all` as property - # _run property correlation with filter on all properties - filter = filter.shallow_clone({"funnel_correlation_names": ["$all"]}) - correlation = FunnelCorrelation(filter, self.team) - - new_result = correlation._run()[0] - - odds_ratios = [item.pop("odds_ratio") for item in new_result] # type: ignore - - for odds, expected_odds in zip(odds_ratios, expected_odds_ratios): - self.assertAlmostEqual(odds, expected_odds) - - self.assertEqual(new_result, result) - - def test_no_divide_by_zero_errors(self): - filters = { - "events": [ - {"id": "user signed up", "type": "events", "order": 0}, - {"id": "paid", "type": "events", "order": 1}, - ], - "insight": INSIGHT_FUNNELS, - "date_from": "2020-01-01", - "date_to": "2020-01-14", - } - - filter = Filter(data=filters) - correlation = FunnelCorrelation(filter, self.team) - - for i in range(2): - _create_person( - distinct_ids=[f"user_{i}"], - team_id=self.team.pk, - properties={"$browser": "Positive"}, - ) - _create_event( - team=self.team, - event="user signed up", - distinct_id=f"user_{i}", - timestamp="2020-01-02T14:00:00Z", - ) - # failure count for this event is 0 - _create_event( - team=self.team, - event="positive", - distinct_id=f"user_{i}", - timestamp="2020-01-03T14:00:00Z", - ) - _create_event( - team=self.team, - event="paid", - distinct_id=f"user_{i}", - timestamp="2020-01-04T14:00:00Z", - ) - - for i in range(2, 4): - _create_person( - distinct_ids=[f"user_{i}"], - team_id=self.team.pk, - properties={"$browser": "Negative"}, - ) - _create_event( - team=self.team, - event="user signed up", - distinct_id=f"user_{i}", - timestamp="2020-01-02T14:00:00Z", - ) - if i % 2 == 0: - # success count for this event is 0 - _create_event( - team=self.team, - event="negatively_related", - distinct_id=f"user_{i}", - timestamp="2020-01-03T14:00:00Z", - ) - - results = correlation._run() - self.assertFalse(results[1]) - - result = results[0] - - odds_ratios = [item.pop("odds_ratio") for item in result] # type: ignore - expected_odds_ratios = [9, 1 / 3] - - for odds, expected_odds in zip(odds_ratios, expected_odds_ratios): - self.assertAlmostEqual(odds, expected_odds) - - self.assertEqual( - result, - [ - { - "event": "positive", - "success_count": 2, - "failure_count": 0, - # "odds_ratio": 9.0, - "correlation_type": "success", - }, - { - "event": "negatively_related", - "success_count": 0, - "failure_count": 1, - # "odds_ratio": 1 / 3, - "correlation_type": "failure", - }, - ], - ) - - def test_correlation_with_properties_raises_validation_error(self): - filters = { - "events": [ - {"id": "user signed up", "type": "events", "order": 0}, - {"id": "paid", "type": "events", "order": 1}, - ], - "insight": INSIGHT_FUNNELS, - "date_from": "2020-01-01", - "date_to": "2020-01-14", - "funnel_correlation_type": "properties", - # "funnel_correlation_names": ["$browser"], missing value - } - - filter = Filter(data=filters) - correlation = FunnelCorrelation(filter, self.team) - - _create_person( - distinct_ids=[f"user_1"], - team_id=self.team.pk, - properties={"$browser": "Positive"}, - ) - _create_event( - team=self.team, - event="user signed up", - distinct_id=f"user_1", - timestamp="2020-01-02T14:00:00Z", - ) - _create_event( - team=self.team, - event="rick", - distinct_id=f"user_1", - timestamp="2020-01-03T14:00:00Z", - ) - _create_event( - team=self.team, - event="paid", - distinct_id=f"user_1", - timestamp="2020-01-04T14:00:00Z", - ) - flush_persons_and_events() - - with self.assertRaises(ValidationError): - correlation._run() - - filter = filter.shallow_clone({"funnel_correlation_type": "event_with_properties"}) - # missing "funnel_correlation_event_names": ["rick"], - with self.assertRaises(ValidationError): - FunnelCorrelation(filter, self.team)._run() - - @also_test_with_materialized_columns( - event_properties=[], person_properties=["$browser"], verify_no_jsonextract=False - ) - def test_correlation_with_multiple_properties(self): - filters = { - "events": [ - {"id": "user signed up", "type": "events", "order": 0}, - {"id": "paid", "type": "events", "order": 1}, - ], - "insight": INSIGHT_FUNNELS, - "date_from": "2020-01-01", - "date_to": "2020-01-14", - "funnel_correlation_type": "properties", - "funnel_correlation_names": ["$browser", "$nice"], - } - - filter = Filter(data=filters) - correlation = FunnelCorrelation(filter, self.team) - - # Β 5 successful people with both properties - for i in range(5): - _create_person( - distinct_ids=[f"user_{i}"], - team_id=self.team.pk, - properties={"$browser": "Positive", "$nice": "very"}, - ) - _create_event( - team=self.team, - event="user signed up", - distinct_id=f"user_{i}", - timestamp="2020-01-02T14:00:00Z", - ) - _create_event( - team=self.team, - event="paid", - distinct_id=f"user_{i}", - timestamp="2020-01-04T14:00:00Z", - ) - - # Β 10 successful people with some different properties - for i in range(5, 15): - _create_person( - distinct_ids=[f"user_{i}"], - team_id=self.team.pk, - properties={"$browser": "Positive", "$nice": "not"}, - ) - _create_event( - team=self.team, - event="user signed up", - distinct_id=f"user_{i}", - timestamp="2020-01-02T14:00:00Z", - ) - _create_event( - team=self.team, - event="paid", - distinct_id=f"user_{i}", - timestamp="2020-01-04T14:00:00Z", - ) - - # 5 Unsuccessful people with some common properties - for i in range(15, 20): - _create_person( - distinct_ids=[f"user_{i}"], - team_id=self.team.pk, - properties={"$browser": "Negative", "$nice": "smh"}, - ) - _create_event( - team=self.team, - event="user signed up", - distinct_id=f"user_{i}", - timestamp="2020-01-02T14:00:00Z", - ) - - # One Positive with failure, no $nice property - _create_person( - distinct_ids=[f"user_fail"], - team_id=self.team.pk, - properties={"$browser": "Positive"}, - ) - _create_event( - team=self.team, - event="user signed up", - distinct_id=f"user_fail", - timestamp="2020-01-02T14:00:00Z", - ) - - # One Negative with success, no $nice property - _create_person( - distinct_ids=[f"user_succ"], - team_id=self.team.pk, - properties={"$browser": "Negative"}, - ) - _create_event( - team=self.team, - event="user signed up", - distinct_id=f"user_succ", - timestamp="2020-01-02T14:00:00Z", - ) - _create_event( - team=self.team, - event="paid", - distinct_id=f"user_succ", - timestamp="2020-01-04T14:00:00Z", - ) - - result = correlation._run()[0] - - # Success Total = 5 + 10 + 1 = 16 - # Failure Total = 5 + 1 = 6 - # Add 1 for priors - - odds_ratios = [item.pop("odds_ratio") for item in result] # type: ignore - expected_odds_ratios = [ - (16 / 2) * ((7 - 1) / (17 - 15)), - (11 / 1) * ((7 - 0) / (17 - 10)), - (6 / 1) * ((7 - 0) / (17 - 5)), - (1 / 6) * ((7 - 5) / (17 - 0)), - (2 / 6) * ((7 - 5) / (17 - 1)), - (2 / 2) * ((7 - 1) / (17 - 1)), - ] - # (success + 1) / (failure + 1) - - for odds, expected_odds in zip(odds_ratios, expected_odds_ratios): - self.assertAlmostEqual(odds, expected_odds) - - expected_result = [ - { - "event": "$browser::Positive", - "success_count": 15, - "failure_count": 1, - # "odds_ratio": 24, - "correlation_type": "success", - }, - { - "event": "$nice::not", - "success_count": 10, - "failure_count": 0, - # "odds_ratio": 11, - "correlation_type": "success", - }, - { - "event": "$nice::very", - "success_count": 5, - "failure_count": 0, - # "odds_ratio": 3.5, - "correlation_type": "success", - }, - { - "event": "$nice::smh", - "success_count": 0, - "failure_count": 5, - # "odds_ratio": 0.0196078431372549, - "correlation_type": "failure", - }, - { - "event": "$browser::Negative", - "success_count": 1, - "failure_count": 5, - # "odds_ratio": 0.041666666666666664, - "correlation_type": "failure", - }, - { - "event": "$nice::", - "success_count": 1, - "failure_count": 1, - # "odds_ratio": 0.375, - "correlation_type": "failure", - }, - ] - - self.assertEqual(result, expected_result) - - # _run property correlation with filter on all properties - filter = filter.shallow_clone({"funnel_correlation_names": ["$all"]}) - correlation = FunnelCorrelation(filter, self.team) - - new_result = correlation._run()[0] - - odds_ratios = [item.pop("odds_ratio") for item in new_result] # type: ignore - - new_expected_odds_ratios = expected_odds_ratios[:-1] - new_expected_result = expected_result[:-1] - # When querying all properties, we don't consider properties that don't exist for part of the data - # since users aren't explicitly asking for that property. Thus, - # We discard $nice:: because it's an empty result set - - for odds, expected_odds in zip(odds_ratios, new_expected_odds_ratios): - self.assertAlmostEqual(odds, expected_odds) - - self.assertEqual(new_result, new_expected_result) - - filter = filter.shallow_clone({"funnel_correlation_exclude_names": ["$browser"]}) - # search for $all but exclude $browser - correlation = FunnelCorrelation(filter, self.team) - - new_result = correlation._run()[0] - odds_ratios = [item.pop("odds_ratio") for item in new_result] # type: ignore - - new_expected_odds_ratios = expected_odds_ratios[1:4] # choosing the $nice property values - new_expected_result = expected_result[1:4] - - for odds, expected_odds in zip(odds_ratios, new_expected_odds_ratios): - self.assertAlmostEqual(odds, expected_odds) - - self.assertEqual(new_result, new_expected_result) - - self.assertEqual( - len(self._get_actors_for_property(filter, [("$nice", "not", "person", None)])), - 10, - ) - self.assertEqual( - len(self._get_actors_for_property(filter, [("$nice", "", "person", None)], False)), - 1, - ) - self.assertEqual( - len(self._get_actors_for_property(filter, [("$nice", "very", "person", None)])), - 5, - ) - - def test_discarding_insignificant_events(self): - filters = { - "events": [ - {"id": "user signed up", "type": "events", "order": 0}, - {"id": "paid", "type": "events", "order": 1}, - ], - "insight": INSIGHT_FUNNELS, - "date_from": "2020-01-01", - "date_to": "2020-01-14", - "funnel_correlation_type": "events", - } - - filter = Filter(data=filters) - correlation = FunnelCorrelation(filter, self.team) - - for i in range(10): - _create_person(distinct_ids=[f"user_{i}"], team_id=self.team.pk) - _create_event( - team=self.team, - event="user signed up", - distinct_id=f"user_{i}", - timestamp="2020-01-02T14:00:00Z", - ) - if i % 2 == 0: - _create_event( - team=self.team, - event="positively_related", - distinct_id=f"user_{i}", - timestamp="2020-01-03T14:00:00Z", - ) - if i % 10 == 0: - _create_event( - team=self.team, - event="low_sig_positively_related", - distinct_id=f"user_{i}", - timestamp="2020-01-03T14:20:00Z", - ) - _create_event( - team=self.team, - event="paid", - distinct_id=f"user_{i}", - timestamp="2020-01-04T14:00:00Z", - ) - - for i in range(10, 20): - _create_person(distinct_ids=[f"user_{i}"], team_id=self.team.pk) - _create_event( - team=self.team, - event="user signed up", - distinct_id=f"user_{i}", - timestamp="2020-01-02T14:00:00Z", - ) - if i % 2 == 0: - _create_event( - team=self.team, - event="negatively_related", - distinct_id=f"user_{i}", - timestamp="2020-01-03T14:00:00Z", - ) - if i % 5 == 0: - _create_event( - team=self.team, - event="low_sig_negatively_related", - distinct_id=f"user_{i}", - timestamp="2020-01-03T14:00:00Z", - ) - - # Β Total 10 positive, 10 negative - # low sig count = 1 and 2, high sig count >= 5 - # Thus, to discard the low sig count, % needs to be >= 10%, or count >= 2 - - # Discard both due to % - FunnelCorrelation.MIN_PERSON_PERCENTAGE = 0.11 - FunnelCorrelation.MIN_PERSON_COUNT = 25 - result = correlation._run()[0] - self.assertEqual(len(result), 2) - - def test_events_within_conversion_window_for_correlation(self): - filters = { - "events": [ - {"id": "user signed up", "type": "events", "order": 0}, - {"id": "paid", "type": "events", "order": 1}, - ], - "insight": INSIGHT_FUNNELS, - "funnel_window_interval": "10", - "funnel_window_interval_unit": "minute", - "date_from": "2020-01-01", - "date_to": "2020-01-14", - "funnel_correlation_type": "events", - } - - filter = Filter(data=filters) - correlation = FunnelCorrelation(filter, self.team) - - _create_person(distinct_ids=["user_successful"], team_id=self.team.pk) - _create_event( - team=self.team, - event="user signed up", - distinct_id="user_successful", - timestamp="2020-01-02T14:00:00Z", - ) - _create_event( - team=self.team, - event="positively_related", - distinct_id="user_successful", - timestamp="2020-01-02T14:02:00Z", - ) - _create_event( - team=self.team, - event="paid", - distinct_id="user_successful", - timestamp="2020-01-02T14:06:00Z", - ) - - _create_person(distinct_ids=["user_dropoff"], team_id=self.team.pk) - _create_event( - team=self.team, - event="user signed up", - distinct_id="user_dropoff", - timestamp="2020-01-02T14:00:00Z", - ) - _create_event( - team=self.team, - event="NOT_negatively_related", - distinct_id="user_dropoff", - timestamp="2020-01-02T14:15:00Z", # event happened outside conversion window - ) - - result = correlation._run()[0] - - odds_ratios = [item.pop("odds_ratio") for item in result] # type: ignore - expected_odds_ratios = [4] - - for odds, expected_odds in zip(odds_ratios, expected_odds_ratios): - self.assertAlmostEqual(odds, expected_odds) - - self.assertEqual( - result, - [ - { - "event": "positively_related", - "success_count": 1, - "failure_count": 0, - # "odds_ratio": 4.0, - "correlation_type": "success", - } - ], - ) - - @also_test_with_materialized_columns(["blah", "signup_source"], verify_no_jsonextract=False) - def test_funnel_correlation_with_event_properties(self): - filters = { - "events": [ - {"id": "user signed up", "type": "events", "order": 0}, - {"id": "paid", "type": "events", "order": 1}, - ], - "insight": INSIGHT_FUNNELS, - "date_from": "2020-01-01", - "date_to": "2020-01-14", - "funnel_correlation_type": "event_with_properties", - "funnel_correlation_event_names": [ - "positively_related", - "negatively_related", - ], - } - - filter = Filter(data=filters) - correlation = FunnelCorrelation(filter, self.team) - - for i in range(10): - _create_person(distinct_ids=[f"user_{i}"], team_id=self.team.pk) - _create_event( - team=self.team, - event="user signed up", - distinct_id=f"user_{i}", - timestamp="2020-01-02T14:00:00Z", - ) - if i % 2 == 0: - _create_event( - team=self.team, - event="positively_related", - distinct_id=f"user_{i}", - timestamp="2020-01-03T14:00:00Z", - properties={ - "signup_source": "facebook" if i % 4 == 0 else "email", - "blah": "value_bleh", - }, - ) - # source: email occurs only twice, so would be discarded from result set - _create_event( - team=self.team, - event="paid", - distinct_id=f"user_{i}", - timestamp="2020-01-04T14:00:00Z", - ) - - for i in range(10, 20): - _create_person(distinct_ids=[f"user_{i}"], team_id=self.team.pk) - _create_event( - team=self.team, - event="user signed up", - distinct_id=f"user_{i}", - timestamp="2020-01-02T14:00:00Z", - ) - if i % 2 == 0: - _create_event( - team=self.team, - event="negatively_related", - distinct_id=f"user_{i}", - timestamp="2020-01-03T14:00:00Z", - properties={"signup_source": "shazam" if i % 6 == 0 else "email"}, - ) - # source: shazam occurs only once, so would be discarded from result set - - result = correlation._run()[0] - - odds_ratios = [item.pop("odds_ratio") for item in result] # type: ignore - expected_odds_ratios = [11, 5.5, 2 / 11] - - for odds, expected_odds in zip(odds_ratios, expected_odds_ratios): - self.assertAlmostEqual(odds, expected_odds) - - self.assertEqual( - result, - [ - { - "event": "positively_related::blah::value_bleh", - "success_count": 5, - "failure_count": 0, - # "odds_ratio": 11.0, - "correlation_type": "success", - }, - { - "event": "positively_related::signup_source::facebook", - "success_count": 3, - "failure_count": 0, - # "odds_ratio": 5.5, - "correlation_type": "success", - }, - { - "event": "negatively_related::signup_source::email", - "success_count": 0, - "failure_count": 3, - # "odds_ratio": 0.18181818181818182, - "correlation_type": "failure", - }, - ], - ) - - self.assertEqual( - len(self._get_actors_for_event(filter, "positively_related", {"blah": "value_bleh"})), - 5, - ) - self.assertEqual( - len(self._get_actors_for_event(filter, "positively_related", {"signup_source": "facebook"})), - 3, - ) - self.assertEqual( - len(self._get_actors_for_event(filter, "positively_related", {"signup_source": "facebook"}, False)), - 0, - ) - self.assertEqual( - len(self._get_actors_for_event(filter, "negatively_related", {"signup_source": "email"}, False)), - 3, - ) - - @also_test_with_materialized_columns(["blah", "signup_source"], verify_no_jsonextract=False) - @snapshot_clickhouse_queries - def test_funnel_correlation_with_event_properties_and_groups(self): - # same test as test_funnel_correlation_with_event_properties but with events attached to groups - GroupTypeMapping.objects.create( - team=self.team, project_id=self.team.project_id, group_type="organization", group_type_index=1 - ) - - for i in range(10): - create_group( - team_id=self.team.pk, - group_type_index=1, - group_key=f"org:{i}", - properties={"industry": "positive"}, - ) - _create_person(distinct_ids=[f"user_{i}"], team_id=self.team.pk) - _create_event( - team=self.team, - event="user signed up", - distinct_id=f"user_{i}", - timestamp="2020-01-02T14:00:00Z", - properties={"$group_1": f"org:{i}"}, - ) - if i % 2 == 0: - _create_event( - team=self.team, - event="positively_related", - distinct_id=f"user_{i}", - timestamp="2020-01-03T14:00:00Z", - properties={ - "signup_source": "facebook" if i % 4 == 0 else "email", - "blah": "value_bleh", - "$group_1": f"org:{i}", - }, - ) - # source: email occurs only twice, so would be discarded from result set - _create_event( - team=self.team, - event="paid", - distinct_id=f"user_{i}", - timestamp="2020-01-04T14:00:00Z", - properties={"$group_1": f"org:{i}"}, - ) - - for i in range(10, 20): - create_group( - team_id=self.team.pk, - group_type_index=1, - group_key=f"org:{i}", - properties={"industry": "positive"}, - ) - _create_person(distinct_ids=[f"user_{i}"], team_id=self.team.pk) - _create_event( - team=self.team, - event="user signed up", - distinct_id=f"user_{i}", - timestamp="2020-01-02T14:00:00Z", - properties={"$group_1": f"org:{i}"}, - ) - if i % 2 == 0: - _create_event( - team=self.team, - event="negatively_related", - distinct_id=f"user_{i}", - timestamp="2020-01-03T14:00:00Z", - properties={ - "signup_source": "shazam" if i % 6 == 0 else "email", - "$group_1": f"org:{i}", - }, - ) - # source: shazam occurs only once, so would be discarded from result set - - filters = { - "events": [ - {"id": "user signed up", "type": "events", "order": 0}, - {"id": "paid", "type": "events", "order": 1}, - ], - "insight": INSIGHT_FUNNELS, - "date_from": "2020-01-01", - "date_to": "2020-01-14", - "aggregation_group_type_index": 1, - "funnel_correlation_type": "event_with_properties", - "funnel_correlation_event_names": [ - "positively_related", - "negatively_related", - ], - } - - filter = Filter(data=filters) - correlation = FunnelCorrelation(filter, self.team) - result = correlation._run()[0] - - odds_ratios = [item.pop("odds_ratio") for item in result] # type: ignore - expected_odds_ratios = [11, 5.5, 2 / 11] - - for odds, expected_odds in zip(odds_ratios, expected_odds_ratios): - self.assertAlmostEqual(odds, expected_odds) - - self.assertEqual( - result, - [ - { - "event": "positively_related::blah::value_bleh", - "success_count": 5, - "failure_count": 0, - # "odds_ratio": 11.0, - "correlation_type": "success", - }, - { - "event": "positively_related::signup_source::facebook", - "success_count": 3, - "failure_count": 0, - # "odds_ratio": 5.5, - "correlation_type": "success", - }, - { - "event": "negatively_related::signup_source::email", - "success_count": 0, - "failure_count": 3, - # "odds_ratio": 0.18181818181818182, - "correlation_type": "failure", - }, - ], - ) - - def test_funnel_correlation_with_event_properties_exclusions(self): - filters = { - "events": [ - {"id": "user signed up", "type": "events", "order": 0}, - {"id": "paid", "type": "events", "order": 1}, - ], - "insight": INSIGHT_FUNNELS, - "date_from": "2020-01-01", - "date_to": "2020-01-14", - "funnel_correlation_type": "event_with_properties", - "funnel_correlation_event_names": ["positively_related"], - "funnel_correlation_event_exclude_property_names": ["signup_source"], - } - - filter = Filter(data=filters) - correlation = FunnelCorrelation(filter, self.team) - - # Need more than 2 events to get a correlation - for i in range(3): - _create_person(distinct_ids=[f"user_{i}"], team_id=self.team.pk) - _create_event( - team=self.team, - event="user signed up", - distinct_id=f"user_{i}", - timestamp="2020-01-02T14:00:00Z", - ) - _create_event( - team=self.team, - event="positively_related", - distinct_id=f"user_{i}", - timestamp="2020-01-03T14:00:00Z", - properties={"signup_source": "facebook", "blah": "value_bleh"}, - ) - _create_event( - team=self.team, - event="paid", - distinct_id=f"user_{i}", - timestamp="2020-01-04T14:00:00Z", - ) - - # Atleast one person that fails, to ensure we get results - _create_person(distinct_ids=[f"user_fail"], team_id=self.team.pk) - _create_event( - team=self.team, - event="user signed up", - distinct_id=f"user_fail", - timestamp="2020-01-02T14:00:00Z", - ) - - result = correlation._run()[0] - self.assertEqual( - result, - [ - { - "event": "positively_related::blah::value_bleh", - "success_count": 3, - "failure_count": 0, - "odds_ratio": 8, - "correlation_type": "success", - }, - # Β missing signup_source, as expected - ], - ) - - self.assertEqual( - len(self._get_actors_for_event(filter, "positively_related", {"blah": "value_bleh"})), - 3, - ) - - # If you search for persons with a specific property, even if excluded earlier, you should get them - self.assertEqual( - len(self._get_actors_for_event(filter, "positively_related", {"signup_source": "facebook"})), - 3, - ) - - @also_test_with_materialized_columns(["$event_type", "signup_source"]) - def test_funnel_correlation_with_event_properties_autocapture(self): - filters = { - "events": [ - {"id": "user signed up", "type": "events", "order": 0}, - {"id": "paid", "type": "events", "order": 1}, - ], - "insight": INSIGHT_FUNNELS, - "date_from": "2020-01-01", - "date_to": "2020-01-14", - "funnel_correlation_type": "event_with_properties", - "funnel_correlation_event_names": ["$autocapture"], - } - - filter = Filter(data=filters) - correlation = FunnelCorrelation(filter, self.team) - - # Need a minimum of 3 hits to get a correlation result - for i in range(6): - _create_person(distinct_ids=[f"user_{i}"], team_id=self.team.pk) - _create_event( - team=self.team, - event="user signed up", - distinct_id=f"user_{i}", - timestamp="2020-01-02T14:00:00Z", - ) - _create_event( - team=self.team, - event="$autocapture", - distinct_id=f"user_{i}", - elements=[Element(nth_of_type=1, nth_child=0, tag_name="a", href="/movie")], - timestamp="2020-01-03T14:00:00Z", - properties={"signup_source": "email", "$event_type": "click"}, - ) - # Test two different types of autocapture elements, with different counts, so we can accurately test results - if i % 2 == 0: - _create_event( - team=self.team, - event="$autocapture", - distinct_id=f"user_{i}", - elements=[ - Element( - nth_of_type=1, - nth_child=0, - tag_name="button", - text="Pay $10", - ) - ], - timestamp="2020-01-03T14:00:00Z", - properties={"signup_source": "facebook", "$event_type": "submit"}, - ) - - _create_event( - team=self.team, - event="paid", - distinct_id=f"user_{i}", - timestamp="2020-01-04T14:00:00Z", - ) - - # Atleast one person that fails, to ensure we get results - _create_person(distinct_ids=[f"user_fail"], team_id=self.team.pk) - _create_event( - team=self.team, - event="user signed up", - distinct_id=f"user_fail", - timestamp="2020-01-02T14:00:00Z", - ) - - result = correlation._run()[0] - - # $autocapture results only return elements chain - self.assertEqual( - result, - [ - { - "event": '$autocapture::elements_chain::click__~~__a:href="/movie"nth-child="0"nth-of-type="1"', - "success_count": 6, - "failure_count": 0, - "odds_ratio": 14.0, - "correlation_type": "success", - }, - { - "event": '$autocapture::elements_chain::submit__~~__button:nth-child="0"nth-of-type="1"text="Pay $10"', - "success_count": 3, - "failure_count": 0, - "odds_ratio": 2.0, - "correlation_type": "success", - }, - ], - ) - - self.assertEqual( - len(self._get_actors_for_event(filter, "$autocapture", {"signup_source": "facebook"})), - 3, - ) - self.assertEqual( - len(self._get_actors_for_event(filter, "$autocapture", {"$event_type": "click"})), - 6, - ) - self.assertEqual( - len( - self._get_actors_for_event( - filter, - "$autocapture", - [ - { - "key": "tag_name", - "operator": "exact", - "type": "element", - "value": "button", - }, - { - "key": "text", - "operator": "exact", - "type": "element", - "value": "Pay $10", - }, - ], - ) - ), - 3, - ) - self.assertEqual( - len( - self._get_actors_for_event( - filter, - "$autocapture", - [ - { - "key": "tag_name", - "operator": "exact", - "type": "element", - "value": "a", - }, - { - "key": "href", - "operator": "exact", - "type": "element", - "value": "/movie", - }, - ], - ) - ), - 6, - ) - - -class TestCorrelationFunctions(unittest.TestCase): - def test_are_results_insignificant(self): - # Same setup as above test: test_discarding_insignificant_events - contingency_tables = [ - EventContingencyTable( - event="negatively_related", - visited=EventStats(success_count=0, failure_count=5), - success_total=10, - failure_total=10, - ), - EventContingencyTable( - event="positively_related", - visited=EventStats(success_count=5, failure_count=0), - success_total=10, - failure_total=10, - ), - EventContingencyTable( - event="low_sig_negatively_related", - visited=EventStats(success_count=0, failure_count=2), - success_total=10, - failure_total=10, - ), - EventContingencyTable( - event="low_sig_positively_related", - visited=EventStats(success_count=1, failure_count=0), - success_total=10, - failure_total=10, - ), - ] - - # Discard both low_sig due to % - FunnelCorrelation.MIN_PERSON_PERCENTAGE = 0.11 - FunnelCorrelation.MIN_PERSON_COUNT = 25 - result = [ - 1 - for contingency_table in contingency_tables - if not FunnelCorrelation.are_results_insignificant(contingency_table) - ] - self.assertEqual(len(result), 2) - - # Discard one low_sig due to % - FunnelCorrelation.MIN_PERSON_PERCENTAGE = 0.051 - FunnelCorrelation.MIN_PERSON_COUNT = 25 - result = [ - 1 - for contingency_table in contingency_tables - if not FunnelCorrelation.are_results_insignificant(contingency_table) - ] - self.assertEqual(len(result), 3) - - # Discard both due to count - FunnelCorrelation.MIN_PERSON_PERCENTAGE = 0.5 - FunnelCorrelation.MIN_PERSON_COUNT = 3 - result = [ - 1 - for contingency_table in contingency_tables - if not FunnelCorrelation.are_results_insignificant(contingency_table) - ] - self.assertEqual(len(result), 2) - - # Discard one due to count - FunnelCorrelation.MIN_PERSON_PERCENTAGE = 0.5 - FunnelCorrelation.MIN_PERSON_COUNT = 2 - result = [ - 1 - for contingency_table in contingency_tables - if not FunnelCorrelation.are_results_insignificant(contingency_table) - ] - self.assertEqual(len(result), 3) - - # Discard everything due to % - FunnelCorrelation.MIN_PERSON_PERCENTAGE = 0.5 - FunnelCorrelation.MIN_PERSON_COUNT = 100 - result = [ - 1 - for contingency_table in contingency_tables - if not FunnelCorrelation.are_results_insignificant(contingency_table) - ] - self.assertEqual(len(result), 0) - - # Discard everything due to count - FunnelCorrelation.MIN_PERSON_PERCENTAGE = 0.5 - FunnelCorrelation.MIN_PERSON_COUNT = 6 - result = [ - 1 - for contingency_table in contingency_tables - if not FunnelCorrelation.are_results_insignificant(contingency_table) - ] - self.assertEqual(len(result), 0) diff --git a/ee/clickhouse/queries/funnels/test/test_funnel_correlations_persons.py b/ee/clickhouse/queries/funnels/test/test_funnel_correlations_persons.py deleted file mode 100644 index c6954e15ee..0000000000 --- a/ee/clickhouse/queries/funnels/test/test_funnel_correlations_persons.py +++ /dev/null @@ -1,651 +0,0 @@ -import urllib.parse -from datetime import datetime, timedelta -from unittest.mock import patch -from uuid import UUID - -from django.utils import timezone -from freezegun import freeze_time - -from ee.clickhouse.queries.funnels.funnel_correlation_persons import ( - FunnelCorrelationActors, -) -from posthog.constants import INSIGHT_FUNNELS -from posthog.models import Cohort, Filter -from posthog.models.person import Person -from posthog.session_recordings.queries.test.session_replay_sql import ( - produce_replay_summary, -) -from posthog.tasks.calculate_cohort import insert_cohort_from_insight_filter -from posthog.test.base import ( - APIBaseTest, - ClickhouseTestMixin, - _create_event, - _create_person, - snapshot_clickhouse_queries, -) -from posthog.test.test_journeys import journeys_for - -FORMAT_TIME = "%Y-%m-%d 00:00:00" -MAX_STEP_COLUMN = 0 -COUNT_COLUMN = 1 -PERSON_ID_COLUMN = 2 - - -class TestClickhouseFunnelCorrelationsActors(ClickhouseTestMixin, APIBaseTest): - maxDiff = None - - def _setup_basic_test(self): - filters = { - "events": [ - {"id": "user signed up", "type": "events", "order": 0}, - {"id": "paid", "type": "events", "order": 1}, - ], - "insight": INSIGHT_FUNNELS, - "date_from": "2020-01-01", - "date_to": "2020-01-14", - "funnel_correlation_type": "events", - } - - filter = Filter(data=filters) - - success_target_persons = [] - failure_target_persons = [] - events_by_person = {} - for i in range(10): - person_id = f"user_{i}" - person = _create_person(distinct_ids=[person_id], team_id=self.team.pk) - events_by_person[person_id] = [{"event": "user signed up", "timestamp": datetime(2020, 1, 2, 14)}] - - if i % 2 == 0: - events_by_person[person_id].append( - { - "event": "positively_related", - "timestamp": datetime(2020, 1, 3, 14), - } - ) - - success_target_persons.append(str(person.uuid)) - - events_by_person[person_id].append({"event": "paid", "timestamp": datetime(2020, 1, 4, 14)}) - - for i in range(10, 20): - person_id = f"user_{i}" - person = _create_person(distinct_ids=[person_id], team_id=self.team.pk) - events_by_person[person_id] = [{"event": "user signed up", "timestamp": datetime(2020, 1, 2, 14)}] - if i % 2 == 0: - events_by_person[person_id].append( - { - "event": "negatively_related", - "timestamp": datetime(2020, 1, 3, 14), - } - ) - failure_target_persons.append(str(person.uuid)) - - # One positively_related as failure - person_fail_id = f"user_fail" - person_fail = _create_person(distinct_ids=[person_fail_id], team_id=self.team.pk) - events_by_person[person_fail_id] = [ - {"event": "user signed up", "timestamp": datetime(2020, 1, 2, 14)}, - {"event": "positively_related", "timestamp": datetime(2020, 1, 3, 14)}, - ] - - # One negatively_related as success - person_success_id = f"user_succ" - person_succ = _create_person(distinct_ids=[person_success_id], team_id=self.team.pk) - events_by_person[person_success_id] = [ - {"event": "user signed up", "timestamp": datetime(2020, 1, 2, 14)}, - {"event": "negatively_related", "timestamp": datetime(2020, 1, 3, 14)}, - {"event": "paid", "timestamp": datetime(2020, 1, 4, 14)}, - ] - journeys_for(events_by_person, self.team, create_people=False) - - return ( - filter, - success_target_persons, - failure_target_persons, - person_fail, - person_succ, - ) - - def test_basic_funnel_correlation_with_events(self): - ( - filter, - success_target_persons, - failure_target_persons, - person_fail, - person_succ, - ) = self._setup_basic_test() - - # test positively_related successes - filter = filter.shallow_clone( - { - "funnel_correlation_person_entity": { - "id": "positively_related", - "type": "events", - }, - "funnel_correlation_person_converted": "TrUe", - } - ) - _, serialized_actors, _ = FunnelCorrelationActors(filter, self.team).get_actors() - - self.assertCountEqual([str(val["id"]) for val in serialized_actors], success_target_persons) - - # test negatively_related failures - filter = filter.shallow_clone( - { - "funnel_correlation_person_entity": { - "id": "negatively_related", - "type": "events", - }, - "funnel_correlation_person_converted": "falsE", - } - ) - - _, serialized_actors, _ = FunnelCorrelationActors(filter, self.team).get_actors() - - self.assertCountEqual([str(val["id"]) for val in serialized_actors], failure_target_persons) - - # test positively_related failures - filter = filter.shallow_clone( - { - "funnel_correlation_person_entity": { - "id": "positively_related", - "type": "events", - }, - "funnel_correlation_person_converted": "False", - } - ) - _, serialized_actors, _ = FunnelCorrelationActors(filter, self.team).get_actors() - - self.assertCountEqual([str(val["id"]) for val in serialized_actors], [str(person_fail.uuid)]) - - # test negatively_related successes - filter = filter.shallow_clone( - { - "funnel_correlation_person_entity": { - "id": "negatively_related", - "type": "events", - }, - "funnel_correlation_person_converted": "trUE", - } - ) - _, serialized_actors, _ = FunnelCorrelationActors(filter, self.team).get_actors() - - self.assertCountEqual([str(val["id"]) for val in serialized_actors], [str(person_succ.uuid)]) - - # test all positively_related - filter = filter.shallow_clone( - { - "funnel_correlation_person_entity": { - "id": "positively_related", - "type": "events", - }, - "funnel_correlation_person_converted": None, - } - ) - _, serialized_actors, _ = FunnelCorrelationActors(filter, self.team).get_actors() - - self.assertCountEqual( - [str(val["id"]) for val in serialized_actors], - [*success_target_persons, str(person_fail.uuid)], - ) - - # test all negatively_related - filter = filter.shallow_clone( - { - "funnel_correlation_person_entity": { - "id": "negatively_related", - "type": "events", - }, - "funnel_correlation_person_converted": None, - } - ) - _, serialized_actors, _ = FunnelCorrelationActors(filter, self.team).get_actors() - - self.assertCountEqual( - [str(val["id"]) for val in serialized_actors], - [*failure_target_persons, str(person_succ.uuid)], - ) - - @patch("posthog.tasks.calculate_cohort.insert_cohort_from_insight_filter.delay") - def test_create_funnel_correlation_cohort(self, _insert_cohort_from_insight_filter): - ( - filter, - success_target_persons, - failure_target_persons, - person_fail, - person_succ, - ) = self._setup_basic_test() - - params = { - "events": [ - {"id": "user signed up", "type": "events", "order": 0}, - {"id": "paid", "type": "events", "order": 1}, - ], - "insight": INSIGHT_FUNNELS, - "date_from": "2020-01-01", - "date_to": "2020-01-14", - "funnel_correlation_type": "events", - "funnel_correlation_person_entity": { - "id": "positively_related", - "type": "events", - }, - "funnel_correlation_person_converted": "TrUe", - } - - response = self.client.post( - f"/api/projects/{self.team.id}/cohorts/?{urllib.parse.urlencode(params)}", - {"name": "test", "is_static": True}, - ).json() - - cohort_id = response["id"] - - _insert_cohort_from_insight_filter.assert_called_once_with( - cohort_id, - { - "events": "[{'id': 'user signed up', 'type': 'events', 'order': 0}, {'id': 'paid', 'type': 'events', 'order': 1}]", - "insight": "FUNNELS", - "date_from": "2020-01-01", - "date_to": "2020-01-14", - "funnel_correlation_type": "events", - "funnel_correlation_person_entity": "{'id': 'positively_related', 'type': 'events'}", - "funnel_correlation_person_converted": "TrUe", - }, - self.team.pk, - ) - - insert_cohort_from_insight_filter(cohort_id, params) - - cohort = Cohort.objects.get(pk=cohort_id) - people = Person.objects.filter(cohort__id=cohort.pk) - self.assertEqual(cohort.errors_calculating, 0) - self.assertEqual(people.count(), 5) - self.assertEqual(cohort.count, 5) - - def test_people_arent_returned_multiple_times(self): - people = journeys_for( - { - "user_1": [ - {"event": "user signed up", "timestamp": datetime(2020, 1, 2, 14)}, - { - "event": "positively_related", - "timestamp": datetime(2020, 1, 3, 14), - }, - # duplicate event - { - "event": "positively_related", - "timestamp": datetime(2020, 1, 3, 14), - }, - {"event": "paid", "timestamp": datetime(2020, 1, 4, 14)}, - ] - }, - self.team, - ) - - filter = Filter( - data={ - "events": [ - {"id": "user signed up", "type": "events", "order": 0}, - {"id": "paid", "type": "events", "order": 1}, - ], - "insight": INSIGHT_FUNNELS, - "date_from": "2020-01-01", - "date_to": "2020-01-14", - "funnel_correlation_type": "events", - "funnel_correlation_person_entity": { - "id": "positively_related", - "type": "events", - }, - "funnel_correlation_person_converted": "TrUe", - } - ) - _, serialized_actors, _ = FunnelCorrelationActors(filter, self.team).get_actors() - - self.assertCountEqual([str(val["id"]) for val in serialized_actors], [str(people["user_1"].uuid)]) - - @snapshot_clickhouse_queries - @freeze_time("2021-01-02 00:00:00.000Z") - def test_funnel_correlation_on_event_with_recordings(self): - p1 = _create_person(distinct_ids=["user_1"], team=self.team, properties={"foo": "bar"}) - _create_event( - event="$pageview", - distinct_id="user_1", - team=self.team, - timestamp=timezone.now(), - properties={"$session_id": "s2", "$window_id": "w1"}, - event_uuid="11111111-1111-1111-1111-111111111111", - ) - _create_event( - event="insight loaded", - distinct_id="user_1", - team=self.team, - timestamp=(timezone.now() + timedelta(minutes=2)), - properties={"$session_id": "s2", "$window_id": "w2"}, - event_uuid="31111111-1111-1111-1111-111111111111", - ) - _create_event( - event="insight analyzed", - distinct_id="user_1", - team=self.team, - timestamp=(timezone.now() + timedelta(minutes=3)), - properties={"$session_id": "s2", "$window_id": "w2"}, - event_uuid="21111111-1111-1111-1111-111111111111", - ) - - timestamp = datetime(2021, 1, 2, 0, 0, 0) - produce_replay_summary( - team_id=self.team.pk, - session_id="s2", - distinct_id="user_1", - first_timestamp=timestamp, - last_timestamp=timestamp, - ) - - # Success filter - filter = Filter( - data={ - "insight": INSIGHT_FUNNELS, - "date_from": "2021-01-01", - "date_to": "2021-01-08", - "funnel_correlation_type": "events", - "events": [ - {"id": "$pageview", "order": 0}, - {"id": "insight analyzed", "order": 1}, - ], - "include_recordings": "true", - "funnel_correlation_person_entity": { - "id": "insight loaded", - "type": "events", - }, - "funnel_correlation_person_converted": "True", - } - ) - _, results, _ = FunnelCorrelationActors(filter, self.team).get_actors() - - self.assertEqual(results[0]["id"], p1.uuid) - self.assertEqual( - results[0]["matched_recordings"], - [ - { - "events": [ - { - "timestamp": timezone.now() + timedelta(minutes=3), - "uuid": UUID("21111111-1111-1111-1111-111111111111"), - "window_id": "w2", - } - ], - "session_id": "s2", - } - ], - ) - - # Drop off filter - filter = Filter( - data={ - "insight": INSIGHT_FUNNELS, - "date_from": "2021-01-01", - "date_to": "2021-01-08", - "funnel_correlation_type": "events", - "events": [ - {"id": "$pageview", "order": 0}, - {"id": "insight analyzed", "order": 1}, - {"id": "insight updated", "order": 2}, - ], - "include_recordings": "true", - "funnel_correlation_person_entity": { - "id": "insight loaded", - "type": "events", - }, - "funnel_correlation_person_converted": "False", - } - ) - _, results, _ = FunnelCorrelationActors(filter, self.team).get_actors() - - self.assertEqual(results[0]["id"], p1.uuid) - self.assertEqual( - results[0]["matched_recordings"], - [ - { - "events": [ - { - "timestamp": timezone.now() + timedelta(minutes=3), - "uuid": UUID("21111111-1111-1111-1111-111111111111"), - "window_id": "w2", - } - ], - "session_id": "s2", - } - ], - ) - - @snapshot_clickhouse_queries - @freeze_time("2021-01-02 00:00:00.000Z") - def test_funnel_correlation_on_properties_with_recordings(self): - p1 = _create_person(distinct_ids=["user_1"], team=self.team, properties={"foo": "bar"}) - _create_event( - event="$pageview", - distinct_id="user_1", - team=self.team, - timestamp=timezone.now(), - properties={"$session_id": "s2", "$window_id": "w1"}, - event_uuid="11111111-1111-1111-1111-111111111111", - ) - _create_event( - event="insight analyzed", - distinct_id="user_1", - team=self.team, - timestamp=(timezone.now() + timedelta(minutes=3)), - properties={"$session_id": "s2", "$window_id": "w2"}, - event_uuid="21111111-1111-1111-1111-111111111111", - ) - - timestamp = datetime(2021, 1, 2, 0, 0, 0) - produce_replay_summary( - team_id=self.team.pk, - session_id="s2", - distinct_id="user_1", - first_timestamp=timestamp, - last_timestamp=timestamp, - ) - - # Success filter - filter = Filter( - data={ - "insight": INSIGHT_FUNNELS, - "date_from": "2021-01-01", - "date_to": "2021-01-08", - "funnel_correlation_type": "properties", - "events": [ - {"id": "$pageview", "order": 0}, - {"id": "insight analyzed", "order": 1}, - ], - "include_recordings": "true", - "funnel_correlation_property_values": [ - { - "key": "foo", - "value": "bar", - "operator": "exact", - "type": "person", - } - ], - "funnel_correlation_person_converted": "True", - } - ) - _, results, _ = FunnelCorrelationActors(filter, self.team).get_actors() - - self.assertEqual(results[0]["id"], p1.uuid) - self.assertEqual( - results[0]["matched_recordings"], - [ - { - "events": [ - { - "timestamp": timezone.now() + timedelta(minutes=3), - "uuid": UUID("21111111-1111-1111-1111-111111111111"), - "window_id": "w2", - } - ], - "session_id": "s2", - } - ], - ) - - @snapshot_clickhouse_queries - @freeze_time("2021-01-02 00:00:00.000Z") - def test_strict_funnel_correlation_with_recordings(self): - # First use that successfully completes the strict funnel - p1 = _create_person(distinct_ids=["user_1"], team=self.team, properties={"foo": "bar"}) - _create_event( - event="$pageview", - distinct_id="user_1", - team=self.team, - timestamp=timezone.now(), - properties={"$session_id": "s2", "$window_id": "w1"}, - event_uuid="11111111-1111-1111-1111-111111111111", - ) - _create_event( - event="insight analyzed", - distinct_id="user_1", - team=self.team, - timestamp=(timezone.now() + timedelta(minutes=3)), - properties={"$session_id": "s2", "$window_id": "w2"}, - event_uuid="31111111-1111-1111-1111-111111111111", - ) - _create_event( - event="insight analyzed", # Second event should not be returned - distinct_id="user_1", - team=self.team, - timestamp=(timezone.now() + timedelta(minutes=4)), - properties={"$session_id": "s2", "$window_id": "w2"}, - event_uuid="41111111-1111-1111-1111-111111111111", - ) - timestamp = datetime(2021, 1, 2, 0, 0, 0) - produce_replay_summary( - team_id=self.team.pk, - session_id="s2", - distinct_id="user_1", - first_timestamp=timestamp, - last_timestamp=timestamp, - ) - - # Second user with strict funnel drop off, but completed the step events for a normal funnel - p2 = _create_person(distinct_ids=["user_2"], team=self.team, properties={"foo": "bar"}) - _create_event( - event="$pageview", - distinct_id="user_2", - team=self.team, - timestamp=timezone.now(), - properties={"$session_id": "s3", "$window_id": "w1"}, - event_uuid="51111111-1111-1111-1111-111111111111", - ) - _create_event( - event="insight loaded", # Interupting event - distinct_id="user_2", - team=self.team, - timestamp=(timezone.now() + timedelta(minutes=3)), - properties={"$session_id": "s3", "$window_id": "w2"}, - event_uuid="61111111-1111-1111-1111-111111111111", - ) - _create_event( - event="insight analyzed", - distinct_id="user_2", - team=self.team, - timestamp=(timezone.now() + timedelta(minutes=4)), - properties={"$session_id": "s3", "$window_id": "w2"}, - event_uuid="71111111-1111-1111-1111-111111111111", - ) - timestamp1 = datetime(2021, 1, 2, 0, 0, 0) - produce_replay_summary( - team_id=self.team.pk, - session_id="s3", - distinct_id="user_2", - first_timestamp=timestamp1, - last_timestamp=timestamp1, - ) - - # Success filter - filter = Filter( - data={ - "insight": INSIGHT_FUNNELS, - "date_from": "2021-01-01", - "date_to": "2021-01-08", - "funnel_order_type": "strict", - "funnel_correlation_type": "properties", - "events": [ - {"id": "$pageview", "order": 0}, - {"id": "insight analyzed", "order": 1}, - ], - "include_recordings": "true", - "funnel_correlation_property_values": [ - { - "key": "foo", - "value": "bar", - "operator": "exact", - "type": "person", - } - ], - "funnel_correlation_person_converted": "True", - } - ) - _, results, _ = FunnelCorrelationActors(filter, self.team).get_actors() - - self.assertEqual(len(results), 1) - self.assertEqual(results[0]["id"], p1.uuid) - self.assertEqual( - results[0]["matched_recordings"], - [ - { - "events": [ - { - "timestamp": timezone.now() + timedelta(minutes=3), - "uuid": UUID("31111111-1111-1111-1111-111111111111"), - "window_id": "w2", - } - ], - "session_id": "s2", - } - ], - ) - - # Drop off filter - filter = Filter( - data={ - "insight": INSIGHT_FUNNELS, - "date_from": "2021-01-01", - "date_to": "2021-01-08", - "funnel_order_type": "strict", - "funnel_correlation_type": "properties", - "events": [ - {"id": "$pageview", "order": 0}, - {"id": "insight analyzed", "order": 1}, - ], - "include_recordings": "true", - "funnel_correlation_property_values": [ - { - "key": "foo", - "value": "bar", - "operator": "exact", - "type": "person", - } - ], - "funnel_correlation_person_converted": "False", - } - ) - _, results, _ = FunnelCorrelationActors(filter, self.team).get_actors() - - self.assertEqual(results[0]["id"], p2.uuid) - self.assertEqual( - results[0]["matched_recordings"], - [ - { - "events": [ - { - "timestamp": timezone.now(), - "uuid": UUID("51111111-1111-1111-1111-111111111111"), - "window_id": "w1", - } - ], - "session_id": "s3", - } - ], - ) diff --git a/ee/clickhouse/queries/groups_join_query.py b/ee/clickhouse/queries/groups_join_query.py deleted file mode 100644 index 5a48bc0d0e..0000000000 --- a/ee/clickhouse/queries/groups_join_query.py +++ /dev/null @@ -1,92 +0,0 @@ -from typing import Optional, Union - -from ee.clickhouse.queries.column_optimizer import EnterpriseColumnOptimizer -from posthog.models import Filter -from posthog.models.filters.path_filter import PathFilter -from posthog.models.filters.retention_filter import RetentionFilter -from posthog.models.filters.stickiness_filter import StickinessFilter -from posthog.models.filters.utils import GroupTypeIndex -from posthog.models.property.util import parse_prop_grouped_clauses -from posthog.queries.util import PersonPropertiesMode, alias_poe_mode_for_legacy -from posthog.schema import PersonsOnEventsMode - - -class GroupsJoinQuery: - """ - Query class responsible for joining with `groups` clickhouse table based on filters - """ - - _filter: Union[Filter, PathFilter, RetentionFilter, StickinessFilter] - _team_id: int - _column_optimizer: EnterpriseColumnOptimizer - - def __init__( - self, - filter: Union[Filter, PathFilter, RetentionFilter, StickinessFilter], - team_id: int, - column_optimizer: Optional[EnterpriseColumnOptimizer] = None, - join_key: Optional[str] = None, - person_on_events_mode: PersonsOnEventsMode = PersonsOnEventsMode.DISABLED, - ) -> None: - self._filter = filter - self._team_id = team_id - self._column_optimizer = column_optimizer or EnterpriseColumnOptimizer(self._filter, self._team_id) - self._join_key = join_key - self._person_on_events_mode = alias_poe_mode_for_legacy(person_on_events_mode) - - def get_join_query(self) -> tuple[str, dict]: - join_queries, params = [], {} - - for group_type_index in self._column_optimizer.group_types_to_query: - var = f"group_index_{group_type_index}" - group_join_key = self._join_key or f'"$group_{group_type_index}"' - join_queries.append( - f""" - LEFT JOIN ( - SELECT - group_key, - argMax(group_properties, _timestamp) AS group_properties_{group_type_index} - FROM groups - WHERE team_id = %(team_id)s AND group_type_index = %({var})s - GROUP BY group_key - ) groups_{group_type_index} - ON {group_join_key} == groups_{group_type_index}.group_key - """ - ) - - params["team_id"] = self._team_id - params[var] = group_type_index - - return "\n".join(join_queries), params - - def get_filter_query(self, group_type_index: GroupTypeIndex) -> tuple[str, dict]: - var = f"group_index_{group_type_index}" - params = { - "team_id": self._team_id, - var: group_type_index, - } - - aggregated_group_filters, filter_params = parse_prop_grouped_clauses( - self._team_id, - self._filter.property_groups, - prepend=f"group_properties_{group_type_index}", - has_person_id_joined=False, - group_properties_joined=True, - person_properties_mode=PersonPropertiesMode.DIRECT, - _top_level=True, - hogql_context=self._filter.hogql_context, - ) - - params.update(filter_params) - - query = f""" - SELECT - group_key, - argMax(group_properties, _timestamp) AS group_properties_{group_type_index} - FROM groups - WHERE team_id = %(team_id)s AND group_type_index = %({var})s - GROUP BY group_key - HAVING 1=1 - {aggregated_group_filters} - """ - return query, params diff --git a/ee/clickhouse/queries/related_actors_query.py b/ee/clickhouse/queries/related_actors_query.py deleted file mode 100644 index 99817998d7..0000000000 --- a/ee/clickhouse/queries/related_actors_query.py +++ /dev/null @@ -1,126 +0,0 @@ -from datetime import timedelta -from functools import cached_property -from typing import Optional, Union - -from django.utils.timezone import now - -from posthog.client import sync_execute -from posthog.models import Team -from posthog.models.filters.utils import validate_group_type_index -from posthog.models.group_type_mapping import GroupTypeMapping -from posthog.models.property import GroupTypeIndex -from posthog.queries.actor_base_query import ( - SerializedActor, - SerializedGroup, - SerializedPerson, - get_groups, - get_serialized_people, -) -from posthog.queries.person_distinct_id_query import get_team_distinct_ids_query - - -class RelatedActorsQuery: - DISTINCT_ID_TABLE_ALIAS = "pdi" - - """ - This query calculates other groups and persons that are related to a person or a group. - - Two actors are considered related if they have had shared events in the past 90 days. - """ - - def __init__( - self, - team: Team, - group_type_index: Optional[Union[GroupTypeIndex, str]], - id: str, - ): - self.team = team - self.group_type_index = validate_group_type_index("group_type_index", group_type_index) - self.id = id - - def run(self) -> list[SerializedActor]: - results: list[SerializedActor] = [] - results.extend(self._query_related_people()) - for group_type_mapping in GroupTypeMapping.objects.filter(project_id=self.team.project_id): - results.extend(self._query_related_groups(group_type_mapping.group_type_index)) - return results - - @property - def is_aggregating_by_groups(self) -> bool: - return self.group_type_index is not None - - def _query_related_people(self) -> list[SerializedPerson]: - if not self.is_aggregating_by_groups: - return [] - - # :KLUDGE: We need to fetch distinct_id + person properties to be able to link to user properly. - person_ids = self._take_first( - sync_execute( - f""" - SELECT DISTINCT {self.DISTINCT_ID_TABLE_ALIAS}.person_id - FROM events e - {self._distinct_ids_join} - WHERE team_id = %(team_id)s - AND timestamp > %(after)s - AND timestamp < %(before)s - AND {self._filter_clause} - """, - self._params, - ) - ) - - serialized_people = get_serialized_people(self.team, person_ids) - return serialized_people - - def _query_related_groups(self, group_type_index: GroupTypeIndex) -> list[SerializedGroup]: - if group_type_index == self.group_type_index: - return [] - - group_ids = self._take_first( - sync_execute( - f""" - SELECT DISTINCT $group_{group_type_index} AS group_key - FROM events e - {'' if self.is_aggregating_by_groups else self._distinct_ids_join} - JOIN ( - SELECT group_key - FROM groups - WHERE team_id = %(team_id)s AND group_type_index = %(group_type_index)s - GROUP BY group_key - ) groups ON $group_{group_type_index} = groups.group_key - WHERE team_id = %(team_id)s - AND timestamp > %(after)s - AND timestamp < %(before)s - AND group_key != '' - AND {self._filter_clause} - ORDER BY group_key - """, - {**self._params, "group_type_index": group_type_index}, - ) - ) - - _, serialize_groups = get_groups(self.team.pk, group_type_index, group_ids) - return serialize_groups - - def _take_first(self, rows: list) -> list: - return [row[0] for row in rows] - - @property - def _filter_clause(self): - if self.is_aggregating_by_groups: - return f"$group_{self.group_type_index} = %(id)s" - else: - return f"{self.DISTINCT_ID_TABLE_ALIAS}.person_id = %(id)s" - - @property - def _distinct_ids_join(self): - return f"JOIN ({get_team_distinct_ids_query(self.team.pk)}) {self.DISTINCT_ID_TABLE_ALIAS} on e.distinct_id = {self.DISTINCT_ID_TABLE_ALIAS}.distinct_id" - - @cached_property - def _params(self): - return { - "team_id": self.team.pk, - "id": self.id, - "after": (now() - timedelta(days=90)).strftime("%Y-%m-%dT%H:%M:%S.%f"), - "before": now().strftime("%Y-%m-%dT%H:%M:%S.%f"), - } diff --git a/ee/clickhouse/queries/retention/__init__.py b/ee/clickhouse/queries/retention/__init__.py deleted file mode 100644 index dcdcf4349a..0000000000 --- a/ee/clickhouse/queries/retention/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .retention import * diff --git a/ee/clickhouse/queries/stickiness/__init__.py b/ee/clickhouse/queries/stickiness/__init__.py deleted file mode 100644 index 516cae3fe8..0000000000 --- a/ee/clickhouse/queries/stickiness/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .stickiness import * diff --git a/ee/clickhouse/queries/stickiness/stickiness.py b/ee/clickhouse/queries/stickiness/stickiness.py deleted file mode 100644 index 65f48c57e3..0000000000 --- a/ee/clickhouse/queries/stickiness/stickiness.py +++ /dev/null @@ -1,12 +0,0 @@ -from ee.clickhouse.queries.stickiness.stickiness_actors import ( - ClickhouseStickinessActors, -) -from ee.clickhouse.queries.stickiness.stickiness_event_query import ( - ClickhouseStickinessEventsQuery, -) -from posthog.queries.stickiness.stickiness import Stickiness - - -class ClickhouseStickiness(Stickiness): - event_query_class = ClickhouseStickinessEventsQuery - actor_query_class = ClickhouseStickinessActors diff --git a/ee/clickhouse/queries/stickiness/stickiness_actors.py b/ee/clickhouse/queries/stickiness/stickiness_actors.py deleted file mode 100644 index 0405aa8674..0000000000 --- a/ee/clickhouse/queries/stickiness/stickiness_actors.py +++ /dev/null @@ -1,15 +0,0 @@ -from ee.clickhouse.queries.stickiness.stickiness_event_query import ( - ClickhouseStickinessEventsQuery, -) -from posthog.models.filters.mixins.utils import cached_property -from posthog.queries.stickiness.stickiness_actors import StickinessActors - - -class ClickhouseStickinessActors(StickinessActors): - event_query_class = ClickhouseStickinessEventsQuery - - @cached_property - def aggregation_group_type_index(self): - if self.entity.math == "unique_group": - return self.entity.math_group_type_index - return None diff --git a/ee/clickhouse/queries/stickiness/stickiness_event_query.py b/ee/clickhouse/queries/stickiness/stickiness_event_query.py deleted file mode 100644 index db15ba05a9..0000000000 --- a/ee/clickhouse/queries/stickiness/stickiness_event_query.py +++ /dev/null @@ -1,11 +0,0 @@ -from posthog.models.group.util import get_aggregation_target_field -from posthog.queries.stickiness.stickiness_event_query import StickinessEventsQuery - - -class ClickhouseStickinessEventsQuery(StickinessEventsQuery): - def aggregation_target(self): - return get_aggregation_target_field( - self._entity.math_group_type_index, - self.EVENT_TABLE_ALIAS, - self._person_id_alias, - ) diff --git a/ee/clickhouse/queries/test/__init__.py b/ee/clickhouse/queries/test/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/clickhouse/queries/test/__snapshots__/test_breakdown_props.ambr b/ee/clickhouse/queries/test/__snapshots__/test_breakdown_props.ambr deleted file mode 100644 index 1dc13e551e..0000000000 --- a/ee/clickhouse/queries/test/__snapshots__/test_breakdown_props.ambr +++ /dev/null @@ -1,252 +0,0 @@ -# serializer version: 1 -# name: TestBreakdownProps.test_breakdown_group_props - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, - count(*) as count - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-12 23:59:59', 'UTC') - AND ((isNull(replaceRegexpAll(JSONExtractRaw(group_properties_0, 'out'), '^"|"$', '')) - OR NOT JSONHas(group_properties_0, 'out'))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 6 - OFFSET 0 - ''' -# --- -# name: TestBreakdownProps.test_breakdown_group_props.1 - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, - count(*) as count - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-12 23:59:59', 'UTC') - AND ((isNull(replaceRegexpAll(JSONExtractRaw(group_properties_0, 'out'), '^"|"$', '')) - OR NOT JSONHas(group_properties_0, 'out'))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 6 - OFFSET 0 - ''' -# --- -# name: TestBreakdownProps.test_breakdown_person_props - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(person_props, '$browser'), '^"|"$', '') AS value, - count(*) as count - FROM events e - INNER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id, - argMax(properties, version) as person_props - FROM person - WHERE team_id = 99999 - GROUP BY id - HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1) person ON pdi.person_id = person.id - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2019-12-21 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 6 - OFFSET 0 - ''' -# --- -# name: TestBreakdownProps.test_breakdown_person_props_materialized - ''' - - SELECT "pmat_$browser" AS value, - count(*) as count - FROM events e - INNER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id, - argMax(pmat_$browser, version) as pmat_$browser - FROM person - WHERE team_id = 99999 - GROUP BY id - HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1) person ON pdi.person_id = person.id - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2019-12-21 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 6 - OFFSET 0 - ''' -# --- -# name: TestBreakdownProps.test_breakdown_person_props_with_entity_filter_and_or_props_with_partial_pushdown - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(person_props, '$browser'), '^"|"$', '') AS value, - count(*) as count - FROM events e - INNER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id, - argMax(properties, version) as person_props - FROM person - WHERE team_id = 99999 - AND id IN - (SELECT id - FROM person - WHERE team_id = 99999 - AND ((replaceRegexpAll(JSONExtractRaw(properties, '$browser'), '^"|"$', '') ILIKE '%test%')) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND ((replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), '$browser'), '^"|"$', '') ILIKE '%test%')) SETTINGS optimize_aggregation_in_order = 1) person ON pdi.person_id = person.id - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2019-12-21 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC') - AND ((has(['test2'], replaceRegexpAll(JSONExtractRaw(person_props, '$os'), '^"|"$', '')) - OR has(['val'], replaceRegexpAll(JSONExtractRaw(e.properties, 'key'), '^"|"$', '')))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 6 - OFFSET 0 - ''' -# --- -# name: TestBreakdownProps.test_breakdown_session_props - ''' - - SELECT sessions.session_duration AS value, - count(*) as count - FROM events e - INNER JOIN - (SELECT "$session_id", - dateDiff('second', min(timestamp), max(timestamp)) as session_duration - FROM events - WHERE "$session_id" != '' - AND team_id = 99999 - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-02 00:00:00', 'UTC') - INTERVAL 24 HOUR - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-12 23:59:59', 'UTC') + INTERVAL 24 HOUR - GROUP BY "$session_id") AS sessions ON sessions."$session_id" = e."$session_id" - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-02 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-12 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: TestBreakdownProps.test_breakdown_with_math_property_session - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(person_props, '$browser'), '^"|"$', '') AS value, - sum(session_duration) as count - FROM events e - INNER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id, - argMax(properties, version) as person_props - FROM person - WHERE team_id = 99999 - GROUP BY id - HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1) person ON pdi.person_id = person.id - INNER JOIN - (SELECT "$session_id", - dateDiff('second', min(timestamp), max(timestamp)) as session_duration - FROM events - WHERE "$session_id" != '' - AND team_id = 99999 - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-02 00:00:00', 'UTC') - INTERVAL 24 HOUR - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-12 23:59:59', 'UTC') + INTERVAL 24 HOUR - GROUP BY "$session_id") AS sessions ON sessions."$session_id" = e."$session_id" - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-02 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-12 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: TestBreakdownProps.test_breakdown_with_math_property_session.1 - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(person_props, '$browser'), '^"|"$', '') AS value, - count(*) as count - FROM events e - INNER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id, - argMax(properties, version) as person_props - FROM person - WHERE team_id = 99999 - GROUP BY id - HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1) person ON pdi.person_id = person.id - INNER JOIN - (SELECT "$session_id", - dateDiff('second', min(timestamp), max(timestamp)) as session_duration - FROM events - WHERE "$session_id" != '' - AND team_id = 99999 - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-02 00:00:00', 'UTC') - INTERVAL 24 HOUR - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-12 23:59:59', 'UTC') + INTERVAL 24 HOUR - GROUP BY "$session_id") AS sessions ON sessions."$session_id" = e."$session_id" - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-02 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-12 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- diff --git a/ee/clickhouse/queries/test/__snapshots__/test_cohort_query.ambr b/ee/clickhouse/queries/test/__snapshots__/test_cohort_query.ambr deleted file mode 100644 index 5494d3c98c..0000000000 --- a/ee/clickhouse/queries/test/__snapshots__/test_cohort_query.ambr +++ /dev/null @@ -1,762 +0,0 @@ -# serializer version: 1 -# name: TestCohortQuery.test_basic_query - ''' - - SELECT person.person_id AS id - FROM - (SELECT pdi.person_id AS person_id, - countIf(timestamp > now() - INTERVAL 1 day - AND timestamp < now() - AND event = '$pageview' - AND 1=1) > 0 AS performed_event_condition_None_level_level_0_level_0_level_0_0, - countIf(timestamp > now() - INTERVAL 2 week - AND timestamp < now() - AND event = '$pageview' - AND 1=1) > 0 AS performed_event_condition_None_level_level_0_level_0_level_1_0, - minIf(timestamp, ((replaceRegexpAll(JSONExtractRaw(properties, '$current_url'), '^"|"$', '') = 'https://posthog.com/feedback/123' - AND event = '$autocapture'))) >= now() - INTERVAL 2 week - AND minIf(timestamp, ((replaceRegexpAll(JSONExtractRaw(properties, '$current_url'), '^"|"$', '') = 'https://posthog.com/feedback/123' - AND event = '$autocapture'))) < now() as first_time_condition_None_level_level_0_level_1_level_0_0 - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event IN ['$pageview', '$pageview', '$autocapture'] - GROUP BY person_id) behavior_query - INNER JOIN - (SELECT *, - id AS person_id - FROM - (SELECT id - FROM person - WHERE team_id = 99999 - AND id IN - (SELECT id - FROM person - WHERE team_id = 99999 - AND (((has(['test@posthog.com'], replaceRegexpAll(JSONExtractRaw(properties, 'email'), '^"|"$', ''))))) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND (((has(['test@posthog.com'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'email'), '^"|"$', ''))))) SETTINGS optimize_aggregation_in_order = 1)) person ON person.person_id = behavior_query.person_id - WHERE 1 = 1 - AND ((((performed_event_condition_None_level_level_0_level_0_level_0_0) - OR (performed_event_condition_None_level_level_0_level_0_level_1_0)) - AND ((first_time_condition_None_level_level_0_level_1_level_0_0)))) SETTINGS optimize_aggregation_in_order = 1, - join_algorithm = 'auto' - ''' -# --- -# name: TestCohortQuery.test_cohort_filter_with_another_cohort_with_event_sequence - ''' - - SELECT person.person_id AS id - FROM - (SELECT person_id, - max(if(event = '$pageview' - AND event_0_latest_0 < event_0_latest_1 - AND event_0_latest_1 <= event_0_latest_0 + INTERVAL 3 day, 2, 1)) = 2 AS steps_0, - max(if(event = '$new_view' - AND event_1_latest_0 < event_1_latest_1 - AND event_1_latest_1 <= event_1_latest_0 + INTERVAL 8 day, 2, 1)) = 2 AS steps_1 - FROM - (SELECT person_id, - event, - properties, - distinct_id, timestamp, event_0_latest_0, - min(event_0_latest_1) over (PARTITION by person_id - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) event_0_latest_1, - event_1_latest_0, - min(event_1_latest_1) over (PARTITION by person_id - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) event_1_latest_1 - FROM - (SELECT if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS person_id, - event, - properties, - distinct_id, timestamp, if(event = '$pageview' - AND timestamp > now() - INTERVAL 8 day, 1, 0) AS event_0_step_0, - if(event_0_step_0 = 1, timestamp, null) AS event_0_latest_0, - if(event = '$pageview' - AND timestamp > now() - INTERVAL 8 day, 1, 0) AS event_0_step_1, - if(event_0_step_1 = 1, timestamp, null) AS event_0_latest_1, - if(event = '$new_view' - AND timestamp > now() - INTERVAL 8 day, 1, 0) AS event_1_step_0, - if(event_1_step_0 = 1, timestamp, null) AS event_1_latest_0, - if(event = '$new_view' - AND timestamp > now() - INTERVAL 8 day, 1, 0) AS event_1_step_1, - if(event_1_step_1 = 1, timestamp, null) AS event_1_latest_1 - FROM events AS e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event IN ['$pageview', '$pageview', '$new_view', '$new_view'] - AND timestamp <= now() - AND timestamp >= now() - INTERVAL 8 day )) - GROUP BY person_id) funnel_query - INNER JOIN - (SELECT *, - id AS person_id - FROM - (SELECT id - FROM person - WHERE team_id = 99999 - AND id IN - (SELECT id - FROM person - WHERE team_id = 99999 - AND (((has(['test'], replaceRegexpAll(JSONExtractRaw(properties, 'name'), '^"|"$', ''))))) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND (((has(['test'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'name'), '^"|"$', ''))))) SETTINGS optimize_aggregation_in_order = 1)) person ON person.person_id = funnel_query.person_id - WHERE 1 = 1 - AND ((((steps_0)) - AND (steps_1))) SETTINGS optimize_aggregation_in_order = 1, - join_algorithm = 'auto' - ''' -# --- -# name: TestCohortQuery.test_cohort_filter_with_extra - ''' - - SELECT person.person_id AS id - FROM - (SELECT pdi.person_id AS person_id, - countIf(timestamp > now() - INTERVAL 1 week - AND timestamp < now() - AND event = '$pageview' - AND 1=1) > 0 AS performed_event_condition_None_level_level_0_level_0_0 - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event IN ['$pageview'] - AND timestamp <= now() - AND timestamp >= now() - INTERVAL 1 week - GROUP BY person_id) behavior_query - INNER JOIN - (SELECT *, - id AS person_id - FROM - (SELECT id - FROM person - WHERE team_id = 99999 - AND id IN - (SELECT id - FROM person - WHERE team_id = 99999 - AND ((((has(['test'], replaceRegexpAll(JSONExtractRaw(properties, 'name'), '^"|"$', '')))))) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND ((((has(['test'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'name'), '^"|"$', '')))))) SETTINGS optimize_aggregation_in_order = 1)) person ON person.person_id = behavior_query.person_id - WHERE 1 = 1 - AND (((performed_event_condition_None_level_level_0_level_0_0))) SETTINGS optimize_aggregation_in_order = 1, - join_algorithm = 'auto' - ''' -# --- -# name: TestCohortQuery.test_cohort_filter_with_extra.1 - ''' - - SELECT if(behavior_query.person_id = '00000000-0000-0000-0000-000000000000', person.person_id, behavior_query.person_id) AS id - FROM - (SELECT pdi.person_id AS person_id, - countIf(timestamp > now() - INTERVAL 1 week - AND timestamp < now() - AND event = '$pageview' - AND 1=1) > 0 AS performed_event_condition_None_level_level_0_level_1_level_0_0 - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event IN ['$pageview'] - AND timestamp <= now() - AND timestamp >= now() - INTERVAL 1 week - GROUP BY person_id) behavior_query - FULL OUTER JOIN - (SELECT *, - id AS person_id - FROM - (SELECT id, - argMax(properties, version) as person_props - FROM person - WHERE team_id = 99999 - GROUP BY id - HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1)) person ON person.person_id = behavior_query.person_id - WHERE 1 = 1 - AND ((((has(['test'], replaceRegexpAll(JSONExtractRaw(person_props, 'name'), '^"|"$', '')))) - OR ((performed_event_condition_None_level_level_0_level_1_level_0_0)))) SETTINGS optimize_aggregation_in_order = 1, - join_algorithm = 'auto' - ''' -# --- -# name: TestCohortQuery.test_performed_event_sequence - ''' - - SELECT funnel_query.person_id AS id - FROM - (SELECT person_id, - max(if(event = '$pageview' - AND event_0_latest_0 < event_0_latest_1 - AND event_0_latest_1 <= event_0_latest_0 + INTERVAL 3 day, 2, 1)) = 2 AS steps_0 - FROM - (SELECT person_id, - event, - properties, - distinct_id, timestamp, event_0_latest_0, - min(event_0_latest_1) over (PARTITION by person_id - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) event_0_latest_1 - FROM - (SELECT if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS person_id, - event, - properties, - distinct_id, timestamp, if(event = '$pageview' - AND timestamp > now() - INTERVAL 7 day, 1, 0) AS event_0_step_0, - if(event_0_step_0 = 1, timestamp, null) AS event_0_latest_0, - if(event = '$pageview' - AND timestamp > now() - INTERVAL 7 day, 1, 0) AS event_0_step_1, - if(event_0_step_1 = 1, timestamp, null) AS event_0_latest_1 - FROM events AS e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event IN ['$pageview', '$pageview'] - AND timestamp <= now() - AND timestamp >= now() - INTERVAL 7 day )) - GROUP BY person_id) funnel_query - WHERE 1 = 1 - AND (((steps_0))) SETTINGS optimize_aggregation_in_order = 1, - join_algorithm = 'auto' - ''' -# --- -# name: TestCohortQuery.test_performed_event_sequence_and_clause_with_additional_event - ''' - - SELECT funnel_query.person_id AS id - FROM - (SELECT person_id, - max(if(event = '$pageview' - AND event_0_latest_0 < event_0_latest_1 - AND event_0_latest_1 <= event_0_latest_0 + INTERVAL 3 day, 2, 1)) = 2 AS steps_0, - countIf(timestamp > now() - INTERVAL 1 week - AND timestamp < now() - AND event = '$new_view' - AND 1=1) >= 1 AS performed_event_multiple_condition_None_level_level_0_level_1_0 - FROM - (SELECT person_id, - event, - properties, - distinct_id, timestamp, event_0_latest_0, - min(event_0_latest_1) over (PARTITION by person_id - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) event_0_latest_1 - FROM - (SELECT if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS person_id, - event, - properties, - distinct_id, timestamp, if(event = '$pageview' - AND timestamp > now() - INTERVAL 7 day, 1, 0) AS event_0_step_0, - if(event_0_step_0 = 1, timestamp, null) AS event_0_latest_0, - if(event = '$pageview' - AND timestamp > now() - INTERVAL 7 day, 1, 0) AS event_0_step_1, - if(event_0_step_1 = 1, timestamp, null) AS event_0_latest_1 - FROM events AS e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event IN ['$new_view', '$pageview', '$pageview'] - AND timestamp <= now() - AND timestamp >= now() - INTERVAL 1 week )) - GROUP BY person_id) funnel_query - WHERE 1 = 1 - AND (((steps_0) - OR (performed_event_multiple_condition_None_level_level_0_level_1_0))) SETTINGS optimize_aggregation_in_order = 1, - join_algorithm = 'auto' - ''' -# --- -# name: TestCohortQuery.test_performed_event_sequence_with_person_properties - ''' - - SELECT person.person_id AS id - FROM - (SELECT person_id, - max(if(event = '$pageview' - AND event_0_latest_0 < event_0_latest_1 - AND event_0_latest_1 <= event_0_latest_0 + INTERVAL 3 day, 2, 1)) = 2 AS steps_0, - countIf(timestamp > now() - INTERVAL 1 week - AND timestamp < now() - AND event = '$pageview' - AND 1=1) >= 1 AS performed_event_multiple_condition_None_level_level_0_level_1_0 - FROM - (SELECT person_id, - event, - properties, - distinct_id, timestamp, event_0_latest_0, - min(event_0_latest_1) over (PARTITION by person_id - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) event_0_latest_1 - FROM - (SELECT if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS person_id, - event, - properties, - distinct_id, timestamp, if(event = '$pageview' - AND timestamp > now() - INTERVAL 7 day, 1, 0) AS event_0_step_0, - if(event_0_step_0 = 1, timestamp, null) AS event_0_latest_0, - if(event = '$pageview' - AND timestamp > now() - INTERVAL 7 day, 1, 0) AS event_0_step_1, - if(event_0_step_1 = 1, timestamp, null) AS event_0_latest_1 - FROM events AS e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event IN ['$pageview', '$pageview', '$pageview'] - AND timestamp <= now() - AND timestamp >= now() - INTERVAL 1 week )) - GROUP BY person_id) funnel_query - INNER JOIN - (SELECT *, - id AS person_id - FROM - (SELECT id - FROM person - WHERE team_id = 99999 - AND id IN - (SELECT id - FROM person - WHERE team_id = 99999 - AND ((has(['test@posthog.com'], replaceRegexpAll(JSONExtractRaw(properties, 'email'), '^"|"$', '')))) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND ((has(['test@posthog.com'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'email'), '^"|"$', '')))) SETTINGS optimize_aggregation_in_order = 1)) person ON person.person_id = funnel_query.person_id - WHERE 1 = 1 - AND (((steps_0) - AND (performed_event_multiple_condition_None_level_level_0_level_1_0))) SETTINGS optimize_aggregation_in_order = 1, - join_algorithm = 'auto' - ''' -# --- -# name: TestCohortQuery.test_performed_event_with_event_filters_and_explicit_date - ''' - - SELECT behavior_query.person_id AS id - FROM - (SELECT pdi.person_id AS person_id, - countIf(timestamp > 'explicit_timestamp' - AND timestamp < now() - AND event = '$pageview' - AND (has(['something'], replaceRegexpAll(JSONExtractRaw(properties, '$filter_prop'), '^"|"$', '')))) > 0 AS performed_event_condition_None_level_level_0_level_0_0 - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event IN ['$pageview'] - AND timestamp <= now() - AND timestamp >= now() - INTERVAL 4 day - GROUP BY person_id) behavior_query - WHERE 1 = 1 - AND (((performed_event_condition_None_level_level_0_level_0_0))) SETTINGS optimize_aggregation_in_order = 1, - join_algorithm = 'auto' - ''' -# --- -# name: TestCohortQuery.test_person - ''' - - SELECT if(behavior_query.person_id = '00000000-0000-0000-0000-000000000000', person.person_id, behavior_query.person_id) AS id - FROM - (SELECT pdi.person_id AS person_id, - countIf(timestamp > now() - INTERVAL 1 week - AND timestamp < now() - AND event = '$pageview' - AND 1=1) > 0 AS performed_event_condition_None_level_level_0_level_0_0 - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event IN ['$pageview'] - AND timestamp <= now() - AND timestamp >= now() - INTERVAL 1 week - GROUP BY person_id) behavior_query - FULL OUTER JOIN - (SELECT *, - id AS person_id - FROM - (SELECT id, - argMax(properties, version) as person_props - FROM person - WHERE team_id = 99999 - GROUP BY id - HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1)) person ON person.person_id = behavior_query.person_id - WHERE 1 = 1 - AND (((performed_event_condition_None_level_level_0_level_0_0) - OR (has(['test@posthog.com'], replaceRegexpAll(JSONExtractRaw(person_props, '$sample_field'), '^"|"$', ''))))) SETTINGS optimize_aggregation_in_order = 1, - join_algorithm = 'auto' - ''' -# --- -# name: TestCohortQuery.test_person_materialized - ''' - - SELECT if(behavior_query.person_id = '00000000-0000-0000-0000-000000000000', person.person_id, behavior_query.person_id) AS id - FROM - (SELECT pdi.person_id AS person_id, - countIf(timestamp > now() - INTERVAL 1 week - AND timestamp < now() - AND event = '$pageview' - AND 1=1) > 0 AS performed_event_condition_None_level_level_0_level_0_0 - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event IN ['$pageview'] - AND timestamp <= now() - AND timestamp >= now() - INTERVAL 1 week - GROUP BY person_id) behavior_query - FULL OUTER JOIN - (SELECT *, - id AS person_id - FROM - (SELECT id, - argMax(pmat_$sample_field, version) as pmat_$sample_field - FROM person - WHERE team_id = 99999 - GROUP BY id - HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1)) person ON person.person_id = behavior_query.person_id - WHERE 1 = 1 - AND (((performed_event_condition_None_level_level_0_level_0_0) - OR (has(['test@posthog.com'], "pmat_$sample_field")))) SETTINGS optimize_aggregation_in_order = 1, - join_algorithm = 'auto' - ''' -# --- -# name: TestCohortQuery.test_person_properties_with_pushdowns - ''' - - SELECT if(behavior_query.person_id = '00000000-0000-0000-0000-000000000000', person.person_id, behavior_query.person_id) AS id - FROM - (SELECT pdi.person_id AS person_id, - countIf(timestamp > now() - INTERVAL 1 day - AND timestamp < now() - AND event = '$pageview' - AND 1=1) > 0 AS performed_event_condition_None_level_level_0_level_0_level_0_0, - countIf(timestamp > now() - INTERVAL 2 week - AND timestamp < now() - AND event = '$pageview' - AND 1=1) > 0 AS performed_event_condition_None_level_level_0_level_0_level_1_0, - minIf(timestamp, ((replaceRegexpAll(JSONExtractRaw(properties, '$current_url'), '^"|"$', '') = 'https://posthog.com/feedback/123' - AND event = '$autocapture'))) >= now() - INTERVAL 2 week - AND minIf(timestamp, ((replaceRegexpAll(JSONExtractRaw(properties, '$current_url'), '^"|"$', '') = 'https://posthog.com/feedback/123' - AND event = '$autocapture'))) < now() as first_time_condition_None_level_level_0_level_1_level_0_0 - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event IN ['$pageview', '$pageview', '$autocapture'] - GROUP BY person_id) behavior_query - FULL OUTER JOIN - (SELECT *, - id AS person_id - FROM - (SELECT id, - argMax(properties, version) as person_props - FROM person - WHERE team_id = 99999 - AND id IN - (SELECT id - FROM person - WHERE team_id = 99999 - AND (((has(['test@posthog.com'], replaceRegexpAll(JSONExtractRaw(properties, 'email'), '^"|"$', ''))))) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND (((has(['test@posthog.com'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'email'), '^"|"$', ''))))) SETTINGS optimize_aggregation_in_order = 1)) person ON person.person_id = behavior_query.person_id - WHERE 1 = 1 - AND ((((performed_event_condition_None_level_level_0_level_0_level_0_0) - OR (performed_event_condition_None_level_level_0_level_0_level_1_0) - OR (has(['special'], replaceRegexpAll(JSONExtractRaw(person_props, 'name'), '^"|"$', '')))) - AND ((first_time_condition_None_level_level_0_level_1_level_0_0)))) SETTINGS optimize_aggregation_in_order = 1, - join_algorithm = 'auto' - ''' -# --- -# name: TestCohortQuery.test_person_props_only - ''' - - SELECT id - FROM person - WHERE team_id = 99999 - AND id IN - (SELECT id - FROM person - WHERE team_id = 99999 - AND (((has(['test1@posthog.com'], replaceRegexpAll(JSONExtractRaw(properties, 'email'), '^"|"$', ''))) - OR (has(['test2@posthog.com'], replaceRegexpAll(JSONExtractRaw(properties, 'email'), '^"|"$', '')))) - OR ((has(['test3'], replaceRegexpAll(JSONExtractRaw(properties, 'name'), '^"|"$', ''))) - AND (has(['test3@posthog.com'], replaceRegexpAll(JSONExtractRaw(properties, 'email'), '^"|"$', ''))))) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND (((has(['test1@posthog.com'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'email'), '^"|"$', ''))) - OR (has(['test2@posthog.com'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'email'), '^"|"$', '')))) - OR ((has(['test3'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'name'), '^"|"$', ''))) - AND (has(['test3@posthog.com'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'email'), '^"|"$', ''))))) SETTINGS optimize_aggregation_in_order = 1 - ''' -# --- -# name: TestCohortQuery.test_precalculated_cohort_filter_with_extra_filters - ''' - - SELECT count(DISTINCT person_id) - FROM cohortpeople - WHERE team_id = 99999 - AND cohort_id = 99999 - AND version = NULL - ''' -# --- -# name: TestCohortQuery.test_precalculated_cohort_filter_with_extra_filters.1 - ''' - /* cohort_calculation: */ - SELECT count(DISTINCT person_id) - FROM cohortpeople - WHERE team_id = 99999 - AND cohort_id = 99999 - AND version = 0 - ''' -# --- -# name: TestCohortQuery.test_precalculated_cohort_filter_with_extra_filters.2 - ''' - - SELECT id - FROM person - WHERE team_id = 99999 - AND id IN - (SELECT id - FROM person - WHERE team_id = 99999 - AND ((((has(['test'], replaceRegexpAll(JSONExtractRaw(properties, 'name'), '^"|"$', ''))))) - OR (has(['test2'], replaceRegexpAll(JSONExtractRaw(properties, 'name'), '^"|"$', '')))) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND ((((has(['test'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'name'), '^"|"$', ''))))) - OR (has(['test2'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'name'), '^"|"$', '')))) SETTINGS optimize_aggregation_in_order = 1 - ''' -# --- -# name: TestCohortQuery.test_static_cohort_filter - ''' - - SELECT count(DISTINCT person_id) - FROM person_static_cohort - WHERE team_id = 99999 - AND cohort_id = 99999 - ''' -# --- -# name: TestCohortQuery.test_static_cohort_filter.1 - ''' - - SELECT person.person_id AS id - FROM - (SELECT *, - id AS person_id - FROM - (SELECT id - FROM person - WHERE team_id = 99999 - GROUP BY id - HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1)) person - WHERE 1 = 1 - AND (((id IN - (SELECT person_id as id - FROM person_static_cohort - WHERE cohort_id = 99999 - AND team_id = 99999)))) SETTINGS optimize_aggregation_in_order = 1, - join_algorithm = 'auto' - ''' -# --- -# name: TestCohortQuery.test_static_cohort_filter_with_extra - ''' - - SELECT count(DISTINCT person_id) - FROM person_static_cohort - WHERE team_id = 99999 - AND cohort_id = 99999 - ''' -# --- -# name: TestCohortQuery.test_static_cohort_filter_with_extra.1 - ''' - - SELECT if(behavior_query.person_id = '00000000-0000-0000-0000-000000000000', person.person_id, behavior_query.person_id) AS id - FROM - (SELECT pdi.person_id AS person_id, - countIf(timestamp > now() - INTERVAL 1 week - AND timestamp < now() - AND event = '$pageview' - AND 1=1) > 0 AS performed_event_condition_None_level_level_0_level_1_0 - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event IN ['$pageview'] - AND timestamp <= now() - AND timestamp >= now() - INTERVAL 1 week - GROUP BY person_id) behavior_query - FULL OUTER JOIN - (SELECT *, - id AS person_id - FROM - (SELECT id - FROM person - WHERE team_id = 99999 - GROUP BY id - HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1)) person ON person.person_id = behavior_query.person_id - WHERE 1 = 1 - AND (((id IN - (SELECT person_id as id - FROM person_static_cohort - WHERE cohort_id = 99999 - AND team_id = 99999)) - AND (performed_event_condition_None_level_level_0_level_1_0))) SETTINGS optimize_aggregation_in_order = 1, - join_algorithm = 'auto' - ''' -# --- -# name: TestCohortQuery.test_static_cohort_filter_with_extra.2 - ''' - - SELECT if(behavior_query.person_id = '00000000-0000-0000-0000-000000000000', person.person_id, behavior_query.person_id) AS id - FROM - (SELECT pdi.person_id AS person_id, - countIf(timestamp > now() - INTERVAL 1 week - AND timestamp < now() - AND event = '$pageview' - AND 1=1) > 0 AS performed_event_condition_None_level_level_0_level_1_level_0_0 - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event IN ['$pageview'] - AND timestamp <= now() - AND timestamp >= now() - INTERVAL 1 week - GROUP BY person_id) behavior_query - FULL OUTER JOIN - (SELECT *, - id AS person_id - FROM - (SELECT id - FROM person - WHERE team_id = 99999 - GROUP BY id - HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1)) person ON person.person_id = behavior_query.person_id - WHERE 1 = 1 - AND ((((id IN - (SELECT person_id as id - FROM person_static_cohort - WHERE cohort_id = 99999 - AND team_id = 99999))) - OR ((performed_event_condition_None_level_level_0_level_1_level_0_0)))) SETTINGS optimize_aggregation_in_order = 1, - join_algorithm = 'auto' - ''' -# --- -# name: TestCohortQuery.test_unwrapping_static_cohort_filter_hidden_in_layers_of_cohorts - ''' - - SELECT count(DISTINCT person_id) - FROM person_static_cohort - WHERE team_id = 99999 - AND cohort_id = 99999 - ''' -# --- -# name: TestCohortQuery.test_unwrapping_static_cohort_filter_hidden_in_layers_of_cohorts.1 - ''' - - SELECT if(behavior_query.person_id = '00000000-0000-0000-0000-000000000000', person.person_id, behavior_query.person_id) AS id - FROM - (SELECT pdi.person_id AS person_id, - countIf(timestamp > now() - INTERVAL 7 day - AND timestamp < now() - AND event = '$new_view' - AND 1=1) > 0 AS performed_event_condition_None_level_level_0_level_0_level_0_0, - countIf(timestamp > now() - INTERVAL 1 week - AND timestamp < now() - AND event = '$pageview' - AND 1=1) > 0 AS performed_event_condition_None_level_level_0_level_1_0 - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event IN ['$new_view', '$pageview'] - AND timestamp <= now() - AND timestamp >= now() - INTERVAL 7 day - GROUP BY person_id) behavior_query - FULL OUTER JOIN - (SELECT *, - id AS person_id - FROM - (SELECT id - FROM person - WHERE team_id = 99999 - GROUP BY id - HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1)) person ON person.person_id = behavior_query.person_id - WHERE 1 = 1 - AND ((((performed_event_condition_None_level_level_0_level_0_level_0_0) - AND (id NOT IN - (SELECT person_id as id - FROM person_static_cohort - WHERE cohort_id = 99999 - AND team_id = 99999))) - OR (performed_event_condition_None_level_level_0_level_1_0))) SETTINGS optimize_aggregation_in_order = 1, - join_algorithm = 'auto' - ''' -# --- diff --git a/ee/clickhouse/queries/test/__snapshots__/test_event_query.ambr b/ee/clickhouse/queries/test/__snapshots__/test_event_query.ambr deleted file mode 100644 index 381db5c625..0000000000 --- a/ee/clickhouse/queries/test/__snapshots__/test_event_query.ambr +++ /dev/null @@ -1,386 +0,0 @@ -# serializer version: 1 -# name: TestEventQuery.test_account_filters - ''' - - SELECT count(DISTINCT person_id) - FROM cohortpeople - WHERE team_id = 99999 - AND cohort_id = 99999 - AND version = NULL - ''' -# --- -# name: TestEventQuery.test_account_filters.1 - ''' - /* cohort_calculation: */ - SELECT count(DISTINCT person_id) - FROM cohortpeople - WHERE team_id = 99999 - AND cohort_id = 99999 - AND version = 0 - ''' -# --- -# name: TestEventQuery.test_account_filters.2 - ''' - SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event = 'event_name' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2021-01-14 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-01-21 23:59:59', 'UTC')) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id - FROM person - WHERE team_id = 99999 - AND id IN - (SELECT id - FROM person - WHERE team_id = 99999 - AND (has(['Jane'], replaceRegexpAll(JSONExtractRaw(properties, 'name'), '^"|"$', ''))) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND (has(['Jane'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'name'), '^"|"$', ''))) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id - WHERE team_id = 99999 - AND event = 'event_name' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2021-01-14 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-01-21 23:59:59', 'UTC') - ''' -# --- -# name: TestEventQuery.test_basic_event_filter - ''' - SELECT e.timestamp as timestamp - FROM events e - WHERE team_id = 99999 - AND event = 'viewed' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2021-05-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') - ''' -# --- -# name: TestEventQuery.test_cohort_filter - ''' - SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event = 'viewed' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2021-05-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC')) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id - FROM person - WHERE team_id = 99999 - GROUP BY id - HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id - WHERE team_id = 99999 - AND event = 'viewed' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2021-05-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') - AND (if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) IN - (SELECT id - FROM person - WHERE team_id = 99999 - AND id IN - (SELECT id - FROM person - WHERE team_id = 99999 - AND ((has(['test'], replaceRegexpAll(JSONExtractRaw(properties, 'name'), '^"|"$', '')))) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND ((has(['test'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'name'), '^"|"$', '')))) SETTINGS optimize_aggregation_in_order = 1)) - ''' -# --- -# name: TestEventQuery.test_denormalised_props - ''' - SELECT e.timestamp as timestamp, - e."mat_test_prop" as "mat_test_prop" - FROM events e - WHERE team_id = 99999 - AND event = 'user signed up' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND ((has(['hi'], "mat_test_prop")) - AND (has(['hi'], "mat_test_prop"))) - ''' -# --- -# name: TestEventQuery.test_element - ''' - SELECT e.timestamp as timestamp - FROM events e - WHERE team_id = 99999 - AND event = 'event_name' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2021-01-14 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-01-21 23:59:59', 'UTC') - AND ((match(elements_chain, '(^|;)label(\\.|$|;|:)'))) - ''' -# --- -# name: TestEventQuery.test_element.1 - ''' - SELECT e.timestamp as timestamp - FROM events e - WHERE team_id = 99999 - AND event = 'event_name' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2021-01-14 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-01-21 23:59:59', 'UTC') - AND (0 = 191) - ''' -# --- -# name: TestEventQuery.test_entity_filtered_by_cohort - ''' - SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2021-05-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC')) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id - FROM person - WHERE team_id = 99999 - GROUP BY id - HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2021-05-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') - AND (if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) IN - (SELECT id - FROM person - WHERE team_id = 99999 - AND id IN - (SELECT id - FROM person - WHERE team_id = 99999 - AND ((has(['test'], replaceRegexpAll(JSONExtractRaw(properties, 'name'), '^"|"$', '')))) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND ((has(['test'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'name'), '^"|"$', '')))) SETTINGS optimize_aggregation_in_order = 1)) - ''' -# --- -# name: TestEventQuery.test_entity_filtered_by_multiple_session_duration_filters - ''' - SELECT e.timestamp as timestamp, - sessions.session_duration as session_duration, - sessions.$session_id as $session_id - FROM events e - INNER JOIN - (SELECT "$session_id", - dateDiff('second', min(timestamp), max(timestamp)) as session_duration - FROM events - WHERE "$session_id" != '' - AND team_id = 99999 - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-02 00:00:00', 'UTC') - INTERVAL 24 HOUR - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-03 23:59:59', 'UTC') + INTERVAL 24 HOUR - GROUP BY "$session_id") as sessions ON sessions."$session_id" = e."$session_id" - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-02 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-03 23:59:59', 'UTC') - AND (sessions.session_duration > 90.0 - AND sessions.session_duration < 150.0) - ''' -# --- -# name: TestEventQuery.test_entity_filtered_by_session_duration - ''' - SELECT e.timestamp as timestamp, - sessions.session_duration as session_duration, - sessions.$session_id as $session_id - FROM events e - INNER JOIN - (SELECT "$session_id", - dateDiff('second', min(timestamp), max(timestamp)) as session_duration - FROM events - WHERE "$session_id" != '' - AND team_id = 99999 - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-02 00:00:00', 'UTC') - INTERVAL 24 HOUR - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-03 23:59:59', 'UTC') + INTERVAL 24 HOUR - GROUP BY "$session_id") as sessions ON sessions."$session_id" = e."$session_id" - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-02 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-03 23:59:59', 'UTC') - AND (sessions.session_duration > 90.0) - ''' -# --- -# name: TestEventQuery.test_event_properties_filter - ''' - SELECT e.timestamp as timestamp, - e."properties" as "properties" - FROM events e - WHERE team_id = 99999 - AND event = 'viewed' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2021-05-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') - AND (has(['test_val'], replaceRegexpAll(JSONExtractRaw(e.properties, 'some_key'), '^"|"$', ''))) - ''' -# --- -# name: TestEventQuery.test_event_properties_filter.1 - ''' - SELECT e.timestamp as timestamp - FROM events e - WHERE team_id = 99999 - AND event = 'viewed' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2021-05-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') - AND (has(['test_val'], replaceRegexpAll(JSONExtractRaw(e.properties, 'some_key'), '^"|"$', ''))) - ''' -# --- -# name: TestEventQuery.test_groups_filters - ''' - SELECT e.timestamp as timestamp - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_1 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 1 - GROUP BY group_key) groups_1 ON "$group_1" == groups_1.group_key - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-12 23:59:59', 'UTC') - AND ((has(['finance'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) - AND (has(['value'], replaceRegexpAll(JSONExtractRaw(group_properties_1, 'another'), '^"|"$', '')))) - ''' -# --- -# name: TestEventQuery.test_groups_filters_mixed - ''' - SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-12 23:59:59', 'UTC')) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id - FROM person - WHERE team_id = 99999 - AND id IN - (SELECT id - FROM person - WHERE team_id = 99999 - AND ((has(['test'], replaceRegexpAll(JSONExtractRaw(properties, '$browser'), '^"|"$', '')))) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND ((has(['test'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), '$browser'), '^"|"$', '')))) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-12 23:59:59', 'UTC') - AND ((has(['finance'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '')))) - ''' -# --- -# name: TestEventQuery.test_static_cohort_filter - ''' - SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event = 'viewed' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2021-05-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC')) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id - FROM person - WHERE team_id = 99999 - GROUP BY id - HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id - WHERE team_id = 99999 - AND event = 'viewed' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2021-05-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') - AND (if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) IN - (SELECT person_id as id - FROM person_static_cohort - WHERE cohort_id = 99999 - AND team_id = 99999)) - ''' -# --- -# name: TestEventQuery.test_unique_session_math_filtered_by_session_duration - ''' - SELECT e.timestamp as timestamp, - e."$session_id" as "$session_id", - sessions.session_duration as session_duration - FROM events e - INNER JOIN - (SELECT "$session_id", - dateDiff('second', min(timestamp), max(timestamp)) as session_duration - FROM events - WHERE "$session_id" != '' - AND team_id = 99999 - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-02 00:00:00', 'UTC') - INTERVAL 24 HOUR - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-03 23:59:59', 'UTC') + INTERVAL 24 HOUR - GROUP BY "$session_id") as sessions ON sessions."$session_id" = e."$session_id" - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-02 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-03 23:59:59', 'UTC') - AND (sessions.session_duration > 30.0) - ''' -# --- diff --git a/ee/clickhouse/queries/test/__snapshots__/test_groups_join_query.ambr b/ee/clickhouse/queries/test/__snapshots__/test_groups_join_query.ambr deleted file mode 100644 index 85b77e6162..0000000000 --- a/ee/clickhouse/queries/test/__snapshots__/test_groups_join_query.ambr +++ /dev/null @@ -1,55 +0,0 @@ -# serializer version: 1 -# name: test_groups_join_query_filtering - tuple( - ''' - - LEFT JOIN ( - SELECT - group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = %(team_id)s AND group_type_index = %(group_index_0)s - GROUP BY group_key - ) groups_0 - ON "$group_0" == groups_0.group_key - - ''', - dict({ - 'group_index_0': 0, - 'team_id': 2, - }), - ) -# --- -# name: test_groups_join_query_filtering_with_custom_key_names - tuple( - ''' - - LEFT JOIN ( - SELECT - group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = %(team_id)s AND group_type_index = %(group_index_0)s - GROUP BY group_key - ) groups_0 - ON call_me_industry == groups_0.group_key - - - LEFT JOIN ( - SELECT - group_key, - argMax(group_properties, _timestamp) AS group_properties_2 - FROM groups - WHERE team_id = %(team_id)s AND group_type_index = %(group_index_2)s - GROUP BY group_key - ) groups_2 - ON call_me_industry == groups_2.group_key - - ''', - dict({ - 'group_index_0': 0, - 'group_index_2': 2, - 'team_id': 2, - }), - ) -# --- diff --git a/ee/clickhouse/queries/test/__snapshots__/test_lifecycle.ambr b/ee/clickhouse/queries/test/__snapshots__/test_lifecycle.ambr deleted file mode 100644 index ff52a4b089..0000000000 --- a/ee/clickhouse/queries/test/__snapshots__/test_lifecycle.ambr +++ /dev/null @@ -1,672 +0,0 @@ -# serializer version: 1 -# name: TestClickhouseLifecycle.test_interval_dates_days - ''' - WITH 'day' AS selected_period, - periods AS - (SELECT dateSub(day, number, dateTrunc(selected_period, toDateTime('2021-05-05 23:59:59', 'UTC'))) AS start_of_period - FROM numbers(dateDiff('day', dateTrunc('day', toDateTime('2021-04-28 00:00:00', 'UTC')), dateTrunc('day', toDateTime('2021-05-05 23:59:59', 'UTC') + INTERVAL 1 day)))) - SELECT groupArray(start_of_period) AS date, - groupArray(counts) AS total, - status - FROM - (SELECT if(status = 'dormant', toInt64(SUM(counts)) * toInt16(-1), toInt64(SUM(counts))) as counts, - start_of_period, - status - FROM - (SELECT periods.start_of_period as start_of_period, - toUInt16(0) AS counts, - status - FROM periods - CROSS JOIN - (SELECT status - FROM - (SELECT ['new', 'returning', 'resurrecting', 'dormant'] as status) ARRAY - JOIN status) as sec - ORDER BY status, - start_of_period - UNION ALL SELECT start_of_period, - count(DISTINCT person_id) counts, - status - FROM - (SELECT if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - arraySort(groupUniqArray(dateTrunc('day', toTimeZone(toDateTime(events.timestamp, 'UTC'), 'UTC')))) AS all_activity, - arrayPopBack(arrayPushFront(all_activity, dateTrunc('day', toTimeZone(toDateTime(min(person.created_at), 'UTC'), 'UTC')))) as previous_activity, - arrayPopFront(arrayPushBack(all_activity, dateTrunc('day', toDateTime('1970-01-01')))) as following_activity, - arrayMap((previous, current, index) -> if(previous = current, 'new', if(current - INTERVAL 1 day = previous - AND index != 1, 'returning', 'resurrecting')), previous_activity, all_activity, arrayEnumerate(all_activity)) as initial_status, - arrayMap((current, next) -> if(current + INTERVAL 1 day = next, '', 'dormant'), all_activity, following_activity) as dormant_status, - arrayMap(x -> x + INTERVAL 1 day, arrayFilter((current, is_dormant) -> is_dormant = 'dormant', all_activity, dormant_status)) as dormant_periods, - arrayMap(x -> 'dormant', dormant_periods) as dormant_label, - arrayConcat(arrayZip(all_activity, initial_status), arrayZip(dormant_periods, dormant_label)) as temp_concat, - arrayJoin(temp_concat) as period_status_pairs, - period_status_pairs.1 as start_of_period, - period_status_pairs.2 as status, - toDateTime(min(person.created_at), 'UTC') AS created_at - FROM events AS e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id, - argMax(created_at, version) as created_at - FROM person - WHERE team_id = 99999 - GROUP BY id - HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id - WHERE team_id = 99999 - AND event = '$pageview' - AND timestamp >= toDateTime(dateTrunc('day', toDateTime('2021-04-28 00:00:00', 'UTC'))) - INTERVAL 1 day - AND timestamp < toDateTime(dateTrunc('day', toDateTime('2021-05-05 23:59:59', 'UTC'))) + INTERVAL 1 day - GROUP BY if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id)) - GROUP BY start_of_period, - status) - WHERE start_of_period <= dateTrunc('day', toDateTime('2021-05-05 23:59:59', 'UTC')) - AND start_of_period >= dateTrunc('day', toDateTime('2021-04-28 00:00:00', 'UTC')) - GROUP BY start_of_period, - status - ORDER BY start_of_period ASC) - GROUP BY status - ''' -# --- -# name: TestClickhouseLifecycle.test_interval_dates_months - ''' - WITH 'month' AS selected_period, - periods AS - (SELECT dateSub(month, number, dateTrunc(selected_period, toDateTime('2021-05-05 23:59:59', 'UTC'))) AS start_of_period - FROM numbers(dateDiff('month', dateTrunc('month', toDateTime('2021-02-04 00:00:00', 'UTC')), dateTrunc('month', toDateTime('2021-05-05 23:59:59', 'UTC') + INTERVAL 1 month)))) - SELECT groupArray(start_of_period) AS date, - groupArray(counts) AS total, - status - FROM - (SELECT if(status = 'dormant', toInt64(SUM(counts)) * toInt16(-1), toInt64(SUM(counts))) as counts, - start_of_period, - status - FROM - (SELECT periods.start_of_period as start_of_period, - toUInt16(0) AS counts, - status - FROM periods - CROSS JOIN - (SELECT status - FROM - (SELECT ['new', 'returning', 'resurrecting', 'dormant'] as status) ARRAY - JOIN status) as sec - ORDER BY status, - start_of_period - UNION ALL SELECT start_of_period, - count(DISTINCT person_id) counts, - status - FROM - (SELECT if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - arraySort(groupUniqArray(dateTrunc('month', toTimeZone(toDateTime(events.timestamp, 'UTC'), 'UTC')))) AS all_activity, - arrayPopBack(arrayPushFront(all_activity, dateTrunc('month', toTimeZone(toDateTime(min(person.created_at), 'UTC'), 'UTC')))) as previous_activity, - arrayPopFront(arrayPushBack(all_activity, dateTrunc('month', toDateTime('1970-01-01')))) as following_activity, - arrayMap((previous, current, index) -> if(previous = current, 'new', if(current - INTERVAL 1 month = previous - AND index != 1, 'returning', 'resurrecting')), previous_activity, all_activity, arrayEnumerate(all_activity)) as initial_status, - arrayMap((current, next) -> if(current + INTERVAL 1 month = next, '', 'dormant'), all_activity, following_activity) as dormant_status, - arrayMap(x -> x + INTERVAL 1 month, arrayFilter((current, is_dormant) -> is_dormant = 'dormant', all_activity, dormant_status)) as dormant_periods, - arrayMap(x -> 'dormant', dormant_periods) as dormant_label, - arrayConcat(arrayZip(all_activity, initial_status), arrayZip(dormant_periods, dormant_label)) as temp_concat, - arrayJoin(temp_concat) as period_status_pairs, - period_status_pairs.1 as start_of_period, - period_status_pairs.2 as status, - toDateTime(min(person.created_at), 'UTC') AS created_at - FROM events AS e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id, - argMax(created_at, version) as created_at - FROM person - WHERE team_id = 99999 - GROUP BY id - HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id - WHERE team_id = 99999 - AND event = '$pageview' - AND timestamp >= toDateTime(dateTrunc('month', toDateTime('2021-02-04 00:00:00', 'UTC'))) - INTERVAL 1 month - AND timestamp < toDateTime(dateTrunc('month', toDateTime('2021-05-05 23:59:59', 'UTC'))) + INTERVAL 1 month - GROUP BY if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id)) - GROUP BY start_of_period, - status) - WHERE start_of_period <= dateTrunc('month', toDateTime('2021-05-05 23:59:59', 'UTC')) - AND start_of_period >= dateTrunc('month', toDateTime('2021-02-04 00:00:00', 'UTC')) - GROUP BY start_of_period, - status - ORDER BY start_of_period ASC) - GROUP BY status - ''' -# --- -# name: TestClickhouseLifecycle.test_interval_dates_weeks - ''' - WITH 'week' AS selected_period, - periods AS - (SELECT dateSub(week, number, dateTrunc(selected_period, toDateTime('2021-05-06 23:59:59', 'UTC'))) AS start_of_period - FROM numbers(dateDiff('week', dateTrunc('week', toDateTime('2021-04-06 00:00:00', 'UTC')), dateTrunc('week', toDateTime('2021-05-06 23:59:59', 'UTC') + INTERVAL 1 week)))) - SELECT groupArray(start_of_period) AS date, - groupArray(counts) AS total, - status - FROM - (SELECT if(status = 'dormant', toInt64(SUM(counts)) * toInt16(-1), toInt64(SUM(counts))) as counts, - start_of_period, - status - FROM - (SELECT periods.start_of_period as start_of_period, - toUInt16(0) AS counts, - status - FROM periods - CROSS JOIN - (SELECT status - FROM - (SELECT ['new', 'returning', 'resurrecting', 'dormant'] as status) ARRAY - JOIN status) as sec - ORDER BY status, - start_of_period - UNION ALL SELECT start_of_period, - count(DISTINCT person_id) counts, - status - FROM - (SELECT if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - arraySort(groupUniqArray(dateTrunc('week', toTimeZone(toDateTime(events.timestamp, 'UTC'), 'UTC')))) AS all_activity, - arrayPopBack(arrayPushFront(all_activity, dateTrunc('week', toTimeZone(toDateTime(min(person.created_at), 'UTC'), 'UTC')))) as previous_activity, - arrayPopFront(arrayPushBack(all_activity, dateTrunc('week', toDateTime('1970-01-01')))) as following_activity, - arrayMap((previous, current, index) -> if(previous = current, 'new', if(current - INTERVAL 1 week = previous - AND index != 1, 'returning', 'resurrecting')), previous_activity, all_activity, arrayEnumerate(all_activity)) as initial_status, - arrayMap((current, next) -> if(current + INTERVAL 1 week = next, '', 'dormant'), all_activity, following_activity) as dormant_status, - arrayMap(x -> x + INTERVAL 1 week, arrayFilter((current, is_dormant) -> is_dormant = 'dormant', all_activity, dormant_status)) as dormant_periods, - arrayMap(x -> 'dormant', dormant_periods) as dormant_label, - arrayConcat(arrayZip(all_activity, initial_status), arrayZip(dormant_periods, dormant_label)) as temp_concat, - arrayJoin(temp_concat) as period_status_pairs, - period_status_pairs.1 as start_of_period, - period_status_pairs.2 as status, - toDateTime(min(person.created_at), 'UTC') AS created_at - FROM events AS e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id, - argMax(created_at, version) as created_at - FROM person - WHERE team_id = 99999 - GROUP BY id - HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id - WHERE team_id = 99999 - AND event = '$pageview' - AND timestamp >= toDateTime(dateTrunc('week', toDateTime('2021-04-06 00:00:00', 'UTC'))) - INTERVAL 1 week - AND timestamp < toDateTime(dateTrunc('week', toDateTime('2021-05-06 23:59:59', 'UTC'))) + INTERVAL 1 week - GROUP BY if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id)) - GROUP BY start_of_period, - status) - WHERE start_of_period <= dateTrunc('week', toDateTime('2021-05-06 23:59:59', 'UTC')) - AND start_of_period >= dateTrunc('week', toDateTime('2021-04-06 00:00:00', 'UTC')) - GROUP BY start_of_period, - status - ORDER BY start_of_period ASC) - GROUP BY status - ''' -# --- -# name: TestClickhouseLifecycle.test_lifecycle_edge_cases - ''' - WITH 'day' AS selected_period, - periods AS - (SELECT dateSub(day, number, dateTrunc(selected_period, toDateTime('2020-01-18 23:59:59', 'UTC'))) AS start_of_period - FROM numbers(dateDiff('day', dateTrunc('day', toDateTime('2020-01-11 00:00:00', 'UTC')), dateTrunc('day', toDateTime('2020-01-18 23:59:59', 'UTC') + INTERVAL 1 day)))) - SELECT groupArray(start_of_period) AS date, - groupArray(counts) AS total, - status - FROM - (SELECT if(status = 'dormant', toInt64(SUM(counts)) * toInt16(-1), toInt64(SUM(counts))) as counts, - start_of_period, - status - FROM - (SELECT periods.start_of_period as start_of_period, - toUInt16(0) AS counts, - status - FROM periods - CROSS JOIN - (SELECT status - FROM - (SELECT ['new', 'returning', 'resurrecting', 'dormant'] as status) ARRAY - JOIN status) as sec - ORDER BY status, - start_of_period - UNION ALL SELECT start_of_period, - count(DISTINCT person_id) counts, - status - FROM - (SELECT if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - arraySort(groupUniqArray(dateTrunc('day', toTimeZone(toDateTime(events.timestamp, 'UTC'), 'UTC')))) AS all_activity, - arrayPopBack(arrayPushFront(all_activity, dateTrunc('day', toTimeZone(toDateTime(min(person.created_at), 'UTC'), 'UTC')))) as previous_activity, - arrayPopFront(arrayPushBack(all_activity, dateTrunc('day', toDateTime('1970-01-01')))) as following_activity, - arrayMap((previous, current, index) -> if(previous = current, 'new', if(current - INTERVAL 1 day = previous - AND index != 1, 'returning', 'resurrecting')), previous_activity, all_activity, arrayEnumerate(all_activity)) as initial_status, - arrayMap((current, next) -> if(current + INTERVAL 1 day = next, '', 'dormant'), all_activity, following_activity) as dormant_status, - arrayMap(x -> x + INTERVAL 1 day, arrayFilter((current, is_dormant) -> is_dormant = 'dormant', all_activity, dormant_status)) as dormant_periods, - arrayMap(x -> 'dormant', dormant_periods) as dormant_label, - arrayConcat(arrayZip(all_activity, initial_status), arrayZip(dormant_periods, dormant_label)) as temp_concat, - arrayJoin(temp_concat) as period_status_pairs, - period_status_pairs.1 as start_of_period, - period_status_pairs.2 as status, - toDateTime(min(person.created_at), 'UTC') AS created_at - FROM events AS e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id, - argMax(created_at, version) as created_at - FROM person - WHERE team_id = 99999 - GROUP BY id - HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id - WHERE team_id = 99999 - AND event = '$pageview' - AND timestamp >= toDateTime(dateTrunc('day', toDateTime('2020-01-11 00:00:00', 'UTC'))) - INTERVAL 1 day - AND timestamp < toDateTime(dateTrunc('day', toDateTime('2020-01-18 23:59:59', 'UTC'))) + INTERVAL 1 day - GROUP BY if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id)) - GROUP BY start_of_period, - status) - WHERE start_of_period <= dateTrunc('day', toDateTime('2020-01-18 23:59:59', 'UTC')) - AND start_of_period >= dateTrunc('day', toDateTime('2020-01-11 00:00:00', 'UTC')) - GROUP BY start_of_period, - status - ORDER BY start_of_period ASC) - GROUP BY status - ''' -# --- -# name: TestClickhouseLifecycle.test_lifecycle_hogql_event_properties - ''' - WITH 'day' AS selected_period, - periods AS - (SELECT dateSub(day, number, dateTrunc(selected_period, toDateTime('2021-05-05 23:59:59', 'UTC'))) AS start_of_period - FROM numbers(dateDiff('day', dateTrunc('day', toDateTime('2021-04-28 00:00:00', 'UTC')), dateTrunc('day', toDateTime('2021-05-05 23:59:59', 'UTC') + INTERVAL 1 day)))) - SELECT groupArray(start_of_period) AS date, - groupArray(counts) AS total, - status - FROM - (SELECT if(status = 'dormant', toInt64(SUM(counts)) * toInt16(-1), toInt64(SUM(counts))) as counts, - start_of_period, - status - FROM - (SELECT periods.start_of_period as start_of_period, - toUInt16(0) AS counts, - status - FROM periods - CROSS JOIN - (SELECT status - FROM - (SELECT ['new', 'returning', 'resurrecting', 'dormant'] as status) ARRAY - JOIN status) as sec - ORDER BY status, - start_of_period - UNION ALL SELECT start_of_period, - count(DISTINCT person_id) counts, - status - FROM - (SELECT if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - arraySort(groupUniqArray(dateTrunc('day', toTimeZone(toDateTime(events.timestamp, 'UTC'), 'UTC')))) AS all_activity, - arrayPopBack(arrayPushFront(all_activity, dateTrunc('day', toTimeZone(toDateTime(min(person.created_at), 'UTC'), 'UTC')))) as previous_activity, - arrayPopFront(arrayPushBack(all_activity, dateTrunc('day', toDateTime('1970-01-01')))) as following_activity, - arrayMap((previous, current, index) -> if(previous = current, 'new', if(current - INTERVAL 1 day = previous - AND index != 1, 'returning', 'resurrecting')), previous_activity, all_activity, arrayEnumerate(all_activity)) as initial_status, - arrayMap((current, next) -> if(current + INTERVAL 1 day = next, '', 'dormant'), all_activity, following_activity) as dormant_status, - arrayMap(x -> x + INTERVAL 1 day, arrayFilter((current, is_dormant) -> is_dormant = 'dormant', all_activity, dormant_status)) as dormant_periods, - arrayMap(x -> 'dormant', dormant_periods) as dormant_label, - arrayConcat(arrayZip(all_activity, initial_status), arrayZip(dormant_periods, dormant_label)) as temp_concat, - arrayJoin(temp_concat) as period_status_pairs, - period_status_pairs.1 as start_of_period, - period_status_pairs.2 as status, - toDateTime(min(person.created_at), 'UTC') AS created_at - FROM events AS e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id, - argMax(created_at, version) as created_at - FROM person - WHERE team_id = 99999 - GROUP BY id - HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id - WHERE team_id = 99999 - AND event = '$pageview' - AND timestamp >= toDateTime(dateTrunc('day', toDateTime('2021-04-28 00:00:00', 'UTC'))) - INTERVAL 1 day - AND timestamp < toDateTime(dateTrunc('day', toDateTime('2021-05-05 23:59:59', 'UTC'))) + INTERVAL 1 day - AND (and(ifNull(like(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$current_url'), ''), 'null'), '^"|"$', ''), '%example%'), 0), 1)) - GROUP BY if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id)) - GROUP BY start_of_period, - status) - WHERE start_of_period <= dateTrunc('day', toDateTime('2021-05-05 23:59:59', 'UTC')) - AND start_of_period >= dateTrunc('day', toDateTime('2021-04-28 00:00:00', 'UTC')) - GROUP BY start_of_period, - status - ORDER BY start_of_period ASC) - GROUP BY status - ''' -# --- -# name: TestClickhouseLifecycle.test_lifecycle_hogql_event_properties_materialized - ''' - WITH 'day' AS selected_period, - periods AS - (SELECT dateSub(day, number, dateTrunc(selected_period, toDateTime('2021-05-05 23:59:59', 'UTC'))) AS start_of_period - FROM numbers(dateDiff('day', dateTrunc('day', toDateTime('2021-04-28 00:00:00', 'UTC')), dateTrunc('day', toDateTime('2021-05-05 23:59:59', 'UTC') + INTERVAL 1 day)))) - SELECT groupArray(start_of_period) AS date, - groupArray(counts) AS total, - status - FROM - (SELECT if(status = 'dormant', toInt64(SUM(counts)) * toInt16(-1), toInt64(SUM(counts))) as counts, - start_of_period, - status - FROM - (SELECT periods.start_of_period as start_of_period, - toUInt16(0) AS counts, - status - FROM periods - CROSS JOIN - (SELECT status - FROM - (SELECT ['new', 'returning', 'resurrecting', 'dormant'] as status) ARRAY - JOIN status) as sec - ORDER BY status, - start_of_period - UNION ALL SELECT start_of_period, - count(DISTINCT person_id) counts, - status - FROM - (SELECT if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - arraySort(groupUniqArray(dateTrunc('day', toTimeZone(toDateTime(events.timestamp, 'UTC'), 'UTC')))) AS all_activity, - arrayPopBack(arrayPushFront(all_activity, dateTrunc('day', toTimeZone(toDateTime(min(person.created_at), 'UTC'), 'UTC')))) as previous_activity, - arrayPopFront(arrayPushBack(all_activity, dateTrunc('day', toDateTime('1970-01-01')))) as following_activity, - arrayMap((previous, current, index) -> if(previous = current, 'new', if(current - INTERVAL 1 day = previous - AND index != 1, 'returning', 'resurrecting')), previous_activity, all_activity, arrayEnumerate(all_activity)) as initial_status, - arrayMap((current, next) -> if(current + INTERVAL 1 day = next, '', 'dormant'), all_activity, following_activity) as dormant_status, - arrayMap(x -> x + INTERVAL 1 day, arrayFilter((current, is_dormant) -> is_dormant = 'dormant', all_activity, dormant_status)) as dormant_periods, - arrayMap(x -> 'dormant', dormant_periods) as dormant_label, - arrayConcat(arrayZip(all_activity, initial_status), arrayZip(dormant_periods, dormant_label)) as temp_concat, - arrayJoin(temp_concat) as period_status_pairs, - period_status_pairs.1 as start_of_period, - period_status_pairs.2 as status, - toDateTime(min(person.created_at), 'UTC') AS created_at - FROM events AS e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id, - argMax(created_at, version) as created_at - FROM person - WHERE team_id = 99999 - GROUP BY id - HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id - WHERE team_id = 99999 - AND event = '$pageview' - AND timestamp >= toDateTime(dateTrunc('day', toDateTime('2021-04-28 00:00:00', 'UTC'))) - INTERVAL 1 day - AND timestamp < toDateTime(dateTrunc('day', toDateTime('2021-05-05 23:59:59', 'UTC'))) + INTERVAL 1 day - AND (and(ifNull(like(nullIf(nullIf(events.`mat_$current_url`, ''), 'null'), '%example%'), 0), 1)) - GROUP BY if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id)) - GROUP BY start_of_period, - status) - WHERE start_of_period <= dateTrunc('day', toDateTime('2021-05-05 23:59:59', 'UTC')) - AND start_of_period >= dateTrunc('day', toDateTime('2021-04-28 00:00:00', 'UTC')) - GROUP BY start_of_period, - status - ORDER BY start_of_period ASC) - GROUP BY status - ''' -# --- -# name: TestClickhouseLifecycle.test_lifecycle_hogql_person_properties - ''' - WITH 'day' AS selected_period, - periods AS - (SELECT dateSub(day, number, dateTrunc(selected_period, toDateTime('2021-05-05 23:59:59', 'UTC'))) AS start_of_period - FROM numbers(dateDiff('day', dateTrunc('day', toDateTime('2021-04-28 00:00:00', 'UTC')), dateTrunc('day', toDateTime('2021-05-05 23:59:59', 'UTC') + INTERVAL 1 day)))) - SELECT groupArray(start_of_period) AS date, - groupArray(counts) AS total, - status - FROM - (SELECT if(status = 'dormant', toInt64(SUM(counts)) * toInt16(-1), toInt64(SUM(counts))) as counts, - start_of_period, - status - FROM - (SELECT periods.start_of_period as start_of_period, - toUInt16(0) AS counts, - status - FROM periods - CROSS JOIN - (SELECT status - FROM - (SELECT ['new', 'returning', 'resurrecting', 'dormant'] as status) ARRAY - JOIN status) as sec - ORDER BY status, - start_of_period - UNION ALL SELECT start_of_period, - count(DISTINCT person_id) counts, - status - FROM - (SELECT if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - arraySort(groupUniqArray(dateTrunc('day', toTimeZone(toDateTime(events.timestamp, 'UTC'), 'UTC')))) AS all_activity, - arrayPopBack(arrayPushFront(all_activity, dateTrunc('day', toTimeZone(toDateTime(min(person.created_at), 'UTC'), 'UTC')))) as previous_activity, - arrayPopFront(arrayPushBack(all_activity, dateTrunc('day', toDateTime('1970-01-01')))) as following_activity, - arrayMap((previous, current, index) -> if(previous = current, 'new', if(current - INTERVAL 1 day = previous - AND index != 1, 'returning', 'resurrecting')), previous_activity, all_activity, arrayEnumerate(all_activity)) as initial_status, - arrayMap((current, next) -> if(current + INTERVAL 1 day = next, '', 'dormant'), all_activity, following_activity) as dormant_status, - arrayMap(x -> x + INTERVAL 1 day, arrayFilter((current, is_dormant) -> is_dormant = 'dormant', all_activity, dormant_status)) as dormant_periods, - arrayMap(x -> 'dormant', dormant_periods) as dormant_label, - arrayConcat(arrayZip(all_activity, initial_status), arrayZip(dormant_periods, dormant_label)) as temp_concat, - arrayJoin(temp_concat) as period_status_pairs, - period_status_pairs.1 as start_of_period, - period_status_pairs.2 as status, - toDateTime(min(person.created_at), 'UTC') AS created_at - FROM events AS e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id, - argMax(created_at, version) as created_at, - argMax(properties, version) as person_props - FROM person - WHERE team_id = 99999 - GROUP BY id - HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id - WHERE team_id = 99999 - AND event = '$pageview' - AND timestamp >= toDateTime(dateTrunc('day', toDateTime('2021-04-28 00:00:00', 'UTC'))) - INTERVAL 1 day - AND timestamp < toDateTime(dateTrunc('day', toDateTime('2021-05-05 23:59:59', 'UTC'))) + INTERVAL 1 day - AND (ifNull(like(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_props, 'email'), ''), 'null'), '^"|"$', ''), '%test.com'), 0)) - GROUP BY if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id)) - GROUP BY start_of_period, - status) - WHERE start_of_period <= dateTrunc('day', toDateTime('2021-05-05 23:59:59', 'UTC')) - AND start_of_period >= dateTrunc('day', toDateTime('2021-04-28 00:00:00', 'UTC')) - GROUP BY start_of_period, - status - ORDER BY start_of_period ASC) - GROUP BY status - ''' -# --- -# name: TestClickhouseLifecycle.test_lifecycle_hogql_person_properties_materialized - ''' - WITH 'day' AS selected_period, - periods AS - (SELECT dateSub(day, number, dateTrunc(selected_period, toDateTime('2021-05-05 23:59:59', 'UTC'))) AS start_of_period - FROM numbers(dateDiff('day', dateTrunc('day', toDateTime('2021-04-28 00:00:00', 'UTC')), dateTrunc('day', toDateTime('2021-05-05 23:59:59', 'UTC') + INTERVAL 1 day)))) - SELECT groupArray(start_of_period) AS date, - groupArray(counts) AS total, - status - FROM - (SELECT if(status = 'dormant', toInt64(SUM(counts)) * toInt16(-1), toInt64(SUM(counts))) as counts, - start_of_period, - status - FROM - (SELECT periods.start_of_period as start_of_period, - toUInt16(0) AS counts, - status - FROM periods - CROSS JOIN - (SELECT status - FROM - (SELECT ['new', 'returning', 'resurrecting', 'dormant'] as status) ARRAY - JOIN status) as sec - ORDER BY status, - start_of_period - UNION ALL SELECT start_of_period, - count(DISTINCT person_id) counts, - status - FROM - (SELECT if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - arraySort(groupUniqArray(dateTrunc('day', toTimeZone(toDateTime(events.timestamp, 'UTC'), 'UTC')))) AS all_activity, - arrayPopBack(arrayPushFront(all_activity, dateTrunc('day', toTimeZone(toDateTime(min(person.created_at), 'UTC'), 'UTC')))) as previous_activity, - arrayPopFront(arrayPushBack(all_activity, dateTrunc('day', toDateTime('1970-01-01')))) as following_activity, - arrayMap((previous, current, index) -> if(previous = current, 'new', if(current - INTERVAL 1 day = previous - AND index != 1, 'returning', 'resurrecting')), previous_activity, all_activity, arrayEnumerate(all_activity)) as initial_status, - arrayMap((current, next) -> if(current + INTERVAL 1 day = next, '', 'dormant'), all_activity, following_activity) as dormant_status, - arrayMap(x -> x + INTERVAL 1 day, arrayFilter((current, is_dormant) -> is_dormant = 'dormant', all_activity, dormant_status)) as dormant_periods, - arrayMap(x -> 'dormant', dormant_periods) as dormant_label, - arrayConcat(arrayZip(all_activity, initial_status), arrayZip(dormant_periods, dormant_label)) as temp_concat, - arrayJoin(temp_concat) as period_status_pairs, - period_status_pairs.1 as start_of_period, - period_status_pairs.2 as status, - toDateTime(min(person.created_at), 'UTC') AS created_at - FROM events AS e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id, - argMax(created_at, version) as created_at, - argMax(pmat_email, version) as pmat_email - FROM person - WHERE team_id = 99999 - GROUP BY id - HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id - WHERE team_id = 99999 - AND event = '$pageview' - AND timestamp >= toDateTime(dateTrunc('day', toDateTime('2021-04-28 00:00:00', 'UTC'))) - INTERVAL 1 day - AND timestamp < toDateTime(dateTrunc('day', toDateTime('2021-05-05 23:59:59', 'UTC'))) + INTERVAL 1 day - AND (ifNull(like(nullIf(nullIf(pmat_email, ''), 'null'), '%test.com'), 0)) - GROUP BY if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id)) - GROUP BY start_of_period, - status) - WHERE start_of_period <= dateTrunc('day', toDateTime('2021-05-05 23:59:59', 'UTC')) - AND start_of_period >= dateTrunc('day', toDateTime('2021-04-28 00:00:00', 'UTC')) - GROUP BY start_of_period, - status - ORDER BY start_of_period ASC) - GROUP BY status - ''' -# --- -# name: TestClickhouseLifecycle.test_test_account_filters_with_groups - ''' - WITH 'day' AS selected_period, - periods AS - (SELECT dateSub(day, number, dateTrunc(selected_period, toDateTime('2020-01-19 23:59:59', 'UTC'))) AS start_of_period - FROM numbers(dateDiff('day', dateTrunc('day', toDateTime('2020-01-12 00:00:00', 'UTC')), dateTrunc('day', toDateTime('2020-01-19 23:59:59', 'UTC') + INTERVAL 1 day)))) - SELECT groupArray(start_of_period) AS date, - groupArray(counts) AS total, - status - FROM - (SELECT if(status = 'dormant', toInt64(SUM(counts)) * toInt16(-1), toInt64(SUM(counts))) as counts, - start_of_period, - status - FROM - (SELECT periods.start_of_period as start_of_period, - toUInt16(0) AS counts, - status - FROM periods - CROSS JOIN - (SELECT status - FROM - (SELECT ['new', 'returning', 'resurrecting', 'dormant'] as status) ARRAY - JOIN status) as sec - ORDER BY status, - start_of_period - UNION ALL SELECT start_of_period, - count(DISTINCT person_id) counts, - status - FROM - (SELECT if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - arraySort(groupUniqArray(dateTrunc('day', toTimeZone(toDateTime(events.timestamp, 'UTC'), 'UTC')))) AS all_activity, - arrayPopBack(arrayPushFront(all_activity, dateTrunc('day', toTimeZone(toDateTime(min(person.created_at), 'UTC'), 'UTC')))) as previous_activity, - arrayPopFront(arrayPushBack(all_activity, dateTrunc('day', toDateTime('1970-01-01')))) as following_activity, - arrayMap((previous, current, index) -> if(previous = current, 'new', if(current - INTERVAL 1 day = previous - AND index != 1, 'returning', 'resurrecting')), previous_activity, all_activity, arrayEnumerate(all_activity)) as initial_status, - arrayMap((current, next) -> if(current + INTERVAL 1 day = next, '', 'dormant'), all_activity, following_activity) as dormant_status, - arrayMap(x -> x + INTERVAL 1 day, arrayFilter((current, is_dormant) -> is_dormant = 'dormant', all_activity, dormant_status)) as dormant_periods, - arrayMap(x -> 'dormant', dormant_periods) as dormant_label, - arrayConcat(arrayZip(all_activity, initial_status), arrayZip(dormant_periods, dormant_label)) as temp_concat, - arrayJoin(temp_concat) as period_status_pairs, - period_status_pairs.1 as start_of_period, - period_status_pairs.2 as status, - toDateTime(min(person.created_at), 'UTC') AS created_at - FROM events AS e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id, - argMax(created_at, version) as created_at - FROM person - WHERE team_id = 99999 - GROUP BY id - HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event = '$pageview' - AND timestamp >= toDateTime(dateTrunc('day', toDateTime('2020-01-12 00:00:00', 'UTC'))) - INTERVAL 1 day - AND timestamp < toDateTime(dateTrunc('day', toDateTime('2020-01-19 23:59:59', 'UTC'))) + INTERVAL 1 day - AND (has(['value'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'key'), '^"|"$', ''))) - GROUP BY if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id)) - GROUP BY start_of_period, - status) - WHERE start_of_period <= dateTrunc('day', toDateTime('2020-01-19 23:59:59', 'UTC')) - AND start_of_period >= dateTrunc('day', toDateTime('2020-01-12 00:00:00', 'UTC')) - GROUP BY start_of_period, - status - ORDER BY start_of_period ASC) - GROUP BY status - ''' -# --- diff --git a/ee/clickhouse/queries/test/__snapshots__/test_person_distinct_id_query.ambr b/ee/clickhouse/queries/test/__snapshots__/test_person_distinct_id_query.ambr deleted file mode 100644 index 112bddef4e..0000000000 --- a/ee/clickhouse/queries/test/__snapshots__/test_person_distinct_id_query.ambr +++ /dev/null @@ -1,13 +0,0 @@ -# serializer version: 1 -# name: test_person_distinct_id_query - ''' - - SELECT distinct_id, argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = %(team_id)s - - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0 - - ''' -# --- diff --git a/ee/clickhouse/queries/test/__snapshots__/test_person_query.ambr b/ee/clickhouse/queries/test/__snapshots__/test_person_query.ambr deleted file mode 100644 index d281c880e5..0000000000 --- a/ee/clickhouse/queries/test/__snapshots__/test_person_query.ambr +++ /dev/null @@ -1,369 +0,0 @@ -# serializer version: 1 -# name: test_person_query - ''' - - SELECT id - FROM person - - WHERE team_id = %(team_id)s - - - - GROUP BY id - HAVING max(is_deleted) = 0 - - - - - - SETTINGS optimize_aggregation_in_order = 1 - - ''' -# --- -# name: test_person_query.1 - ''' - - SELECT id - FROM person - - WHERE team_id = %(team_id)s - AND id IN ( - SELECT id FROM person - - WHERE team_id = %(team_id)s - AND ( "pmat_email" ILIKE %(vperson_filter_pre__0)s) - - ) - - - - GROUP BY id - HAVING max(is_deleted) = 0 - - AND ( argMax(person."pmat_email", version) ILIKE %(vpersonquery_person_filter_fin__0)s) - - - - SETTINGS optimize_aggregation_in_order = 1 - - ''' -# --- -# name: test_person_query_with_and_and_or_property_groups - ''' - - SELECT id, argMax(properties, version) as person_props - FROM person - - WHERE team_id = %(team_id)s - AND id IN ( - SELECT id FROM person - - WHERE team_id = %(team_id)s - AND (( "pmat_email" ILIKE %(vperson_filter_pre__0_0)s OR replaceRegexpAll(JSONExtractRaw(properties, %(kperson_filter_pre__0_1)s), '^"|"$', '') ILIKE %(vperson_filter_pre__0_1)s)) - - ) - - - - GROUP BY id - HAVING max(is_deleted) = 0 - - AND (( argMax(person."pmat_email", version) ILIKE %(vpersonquery_person_filter_fin__0_0)s OR replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), %(kpersonquery_person_filter_fin__0_1)s), '^"|"$', '') ILIKE %(vpersonquery_person_filter_fin__0_1)s)) - - - - SETTINGS optimize_aggregation_in_order = 1 - - ''' -# --- -# name: test_person_query_with_anded_property_groups - ''' - - SELECT id - FROM person - - WHERE team_id = %(team_id)s - AND id IN ( - SELECT id FROM person - - WHERE team_id = %(team_id)s - AND ( "pmat_email" ILIKE %(vperson_filter_pre__0)s AND has(%(vperson_filter_pre__1)s, replaceRegexpAll(JSONExtractRaw(properties, %(kperson_filter_pre__1)s), '^"|"$', '')) AND has(%(vperson_filter_pre__2)s, replaceRegexpAll(JSONExtractRaw(properties, %(kperson_filter_pre__2)s), '^"|"$', ''))) - - ) - - - - GROUP BY id - HAVING max(is_deleted) = 0 - - AND ( argMax(person."pmat_email", version) ILIKE %(vpersonquery_person_filter_fin__0)s AND has(%(vpersonquery_person_filter_fin__1)s, replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), %(kpersonquery_person_filter_fin__1)s), '^"|"$', '')) AND has(%(vpersonquery_person_filter_fin__2)s, replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), %(kpersonquery_person_filter_fin__2)s), '^"|"$', ''))) - - - - SETTINGS optimize_aggregation_in_order = 1 - - ''' -# --- -# name: test_person_query_with_entity_filters - ''' - - SELECT id, argMax(pmat_email, version) as pmat_email - FROM person - - WHERE team_id = %(team_id)s - - - - GROUP BY id - HAVING max(is_deleted) = 0 - - - - - - SETTINGS optimize_aggregation_in_order = 1 - - ''' -# --- -# name: test_person_query_with_entity_filters.1 - ''' - - SELECT id - FROM person - - WHERE team_id = %(team_id)s - AND id IN ( - SELECT id FROM person - - WHERE team_id = %(team_id)s - AND ( "pmat_email" ILIKE %(vperson_filter_pre__0)s) - - ) - - - - GROUP BY id - HAVING max(is_deleted) = 0 - - AND ( argMax(person."pmat_email", version) ILIKE %(vpersonquery_person_filter_fin__0)s) - - - - SETTINGS optimize_aggregation_in_order = 1 - - ''' -# --- -# name: test_person_query_with_entity_filters_and_property_group_filters - ''' - - SELECT id, argMax(pmat_email, version) as pmat_email , argMax(properties, version) as person_props - FROM person - - WHERE team_id = %(team_id)s - AND id IN ( - SELECT id FROM person - - WHERE team_id = %(team_id)s - AND (( "pmat_email" ILIKE %(vperson_filter_pre__0_0)s OR replaceRegexpAll(JSONExtractRaw(properties, %(kperson_filter_pre__0_1)s), '^"|"$', '') ILIKE %(vperson_filter_pre__0_1)s)) - - ) - - - - GROUP BY id - HAVING max(is_deleted) = 0 - - AND (( argMax(person."pmat_email", version) ILIKE %(vpersonquery_person_filter_fin__0_0)s OR replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), %(kpersonquery_person_filter_fin__0_1)s), '^"|"$', '') ILIKE %(vpersonquery_person_filter_fin__0_1)s)) - - - - SETTINGS optimize_aggregation_in_order = 1 - - ''' -# --- -# name: test_person_query_with_entity_filters_and_property_group_filters.1 - ''' - - SELECT id, argMax(properties, version) as person_props - FROM person - - WHERE team_id = %(team_id)s - AND id IN ( - SELECT id FROM person - - WHERE team_id = %(team_id)s - AND ((( "pmat_email" ILIKE %(vperson_filter_pre__0_0_0)s OR replaceRegexpAll(JSONExtractRaw(properties, %(kperson_filter_pre__0_0_1)s), '^"|"$', '') ILIKE %(vperson_filter_pre__0_0_1)s))AND ( "pmat_email" ILIKE %(vperson_filter_pre__1_0)s OR replaceRegexpAll(JSONExtractRaw(properties, %(kperson_filter_pre__1_1)s), '^"|"$', '') ILIKE %(vperson_filter_pre__1_1)s)) - - ) - - - - GROUP BY id - HAVING max(is_deleted) = 0 - - AND ((( argMax(person."pmat_email", version) ILIKE %(vpersonquery_person_filter_fin__0_0_0)s OR replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), %(kpersonquery_person_filter_fin__0_0_1)s), '^"|"$', '') ILIKE %(vpersonquery_person_filter_fin__0_0_1)s))AND ( argMax(person."pmat_email", version) ILIKE %(vpersonquery_person_filter_fin__1_0)s OR replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), %(kpersonquery_person_filter_fin__1_1)s), '^"|"$', '') ILIKE %(vpersonquery_person_filter_fin__1_1)s)) - - - - SETTINGS optimize_aggregation_in_order = 1 - - ''' -# --- -# name: test_person_query_with_extra_fields - ''' - - SELECT id, argMax(pmat_email, version) as pmat_email , argMax(properties, version) as person_props - FROM person - - WHERE team_id = %(team_id)s - AND id IN ( - SELECT id FROM person - - WHERE team_id = %(team_id)s - AND ( "pmat_email" ILIKE %(vperson_filter_pre__0)s) - - ) - - - - GROUP BY id - HAVING max(is_deleted) = 0 - - AND ( argMax(person."pmat_email", version) ILIKE %(vpersonquery_person_filter_fin__0)s) - - - - SETTINGS optimize_aggregation_in_order = 1 - - ''' -# --- -# name: test_person_query_with_extra_requested_fields - ''' - - SELECT id, argMax(properties, version) as person_props - FROM person - - WHERE team_id = %(team_id)s - AND id IN ( - SELECT id FROM person - - WHERE team_id = %(team_id)s - AND ( "pmat_email" ILIKE %(vperson_filter_pre__0)s) - - ) - - - - GROUP BY id - HAVING max(is_deleted) = 0 - - AND ( argMax(person."pmat_email", version) ILIKE %(vpersonquery_person_filter_fin__0)s) - - - - SETTINGS optimize_aggregation_in_order = 1 - - ''' -# --- -# name: test_person_query_with_extra_requested_fields.1 - ''' - - SELECT id, argMax(pmat_email, version) as pmat_email - FROM person - - WHERE team_id = %(team_id)s - AND id IN ( - SELECT id FROM person - - WHERE team_id = %(team_id)s - AND ( "pmat_email" ILIKE %(vperson_filter_pre__0)s) - - ) - - - - GROUP BY id - HAVING max(is_deleted) = 0 - - AND ( argMax(person."pmat_email", version) ILIKE %(vpersonquery_person_filter_fin__0)s) - - - - SETTINGS optimize_aggregation_in_order = 1 - - ''' -# --- -# name: test_person_query_with_multiple_cohorts - ''' - - SELECT id - FROM person - - WHERE team_id = %(team_id)s - AND id IN ( - SELECT id FROM person - - WHERE team_id = %(team_id)s - AND ( "pmat_email" ILIKE %(vperson_filter_pre__0)s) - - ) - - AND id in ( - SELECT DISTINCT person_id FROM cohortpeople WHERE team_id = %(team_id)s AND cohort_id = %(_cohort_id_0)s AND version = %(_version_0)s - ) AND id in ( - SELECT DISTINCT person_id FROM cohortpeople WHERE team_id = %(team_id)s AND cohort_id = %(_cohort_id_1)s AND version = %(_version_1)s - ) - - GROUP BY id - HAVING max(is_deleted) = 0 - - AND ( argMax(person."pmat_email", version) ILIKE %(vpersonquery_person_filter_fin__0)s) - - - - SETTINGS optimize_aggregation_in_order = 1 - - ''' -# --- -# name: test_person_query_with_updated_after - ''' - - SELECT id - FROM person - - WHERE team_id = %(team_id)s - - - - GROUP BY id - HAVING max(is_deleted) = 0 - and max(_timestamp) > parseDateTimeBestEffort(%(updated_after)s) - - - - - SETTINGS optimize_aggregation_in_order = 1 - - ''' -# --- -# name: test_person_query_with_updated_after.1 - ''' - - SELECT id - FROM person - - WHERE team_id = %(team_id)s - - - - GROUP BY id - HAVING max(is_deleted) = 0 - and max(_timestamp) > parseDateTimeBestEffort(%(updated_after)s) - - - - - SETTINGS optimize_aggregation_in_order = 1 - - ''' -# --- diff --git a/ee/clickhouse/queries/test/test_breakdown_props.py b/ee/clickhouse/queries/test/test_breakdown_props.py deleted file mode 100644 index 7012586163..0000000000 --- a/ee/clickhouse/queries/test/test_breakdown_props.py +++ /dev/null @@ -1,554 +0,0 @@ -import pytest -from freezegun import freeze_time - -from posthog.models.cohort import Cohort -from posthog.models.entity import Entity -from posthog.models.filters import Filter -from posthog.models.group.util import create_group -from posthog.models.group_type_mapping import GroupTypeMapping -from posthog.queries.breakdown_props import ( - _to_bucketing_expression, - get_breakdown_prop_values, -) -from posthog.queries.trends.util import process_math -from posthog.test.base import ( - APIBaseTest, - ClickhouseTestMixin, - _create_event, - _create_person, - also_test_with_materialized_columns, - snapshot_clickhouse_queries, -) - - -class TestBreakdownProps(ClickhouseTestMixin, APIBaseTest): - @also_test_with_materialized_columns( - event_properties=["$host", "distinct_id"], - person_properties=["$browser", "email"], - ) - @snapshot_clickhouse_queries - def test_breakdown_person_props(self): - _create_person(team_id=self.team.pk, distinct_ids=["p1"], properties={"$browser": "test"}) - _create_event( - team=self.team, - event="$pageview", - distinct_id="p1", - timestamp="2020-01-02T12:00:00Z", - properties={"key": "val"}, - ) - - self.team.test_account_filters = [ - { - "key": "email", - "type": "person", - "value": "posthog.com", - "operator": "not_icontains", - }, - { - "key": "$host", - "type": "event", - "value": [ - "127.0.0.1:3000", - "127.0.0.1:5000", - "localhost:5000", - "localhost:8000", - ], - "operator": "is_not", - }, - { - "key": "distinct_id", - "type": "event", - "value": "posthog.com", - "operator": "not_icontains", - }, - ] - self.team.save() - with freeze_time("2020-01-04T13:01:01Z"): - filter = Filter( - data={ - "insight": "FUNNELS", - "properties": [], - "filter_test_accounts": True, - "events": [ - { - "id": "$pageview", - "name": "$pageview", - "type": "events", - "order": 0, - } - ], - "actions": [], - "funnel_viz_type": "steps", - "display": "FunnelViz", - "interval": "day", - "breakdown": "$browser", - "breakdown_type": "person", - "breakdown_limit": 5, - "date_from": "-14d", - "funnel_window_days": 14, - } - ) - res = get_breakdown_prop_values( - filter, - Entity({"id": "$pageview", "type": "events"}), - "count(*)", - self.team, - ) - self.assertEqual(res[0], ["test"]) - - def test_breakdown_person_props_with_entity_filter(self): - _create_person(team_id=self.team.pk, distinct_ids=["p1"], properties={"$browser": "test"}) - _create_event( - team=self.team, - event="$pageview", - distinct_id="p1", - timestamp="2020-01-02T12:00:00Z", - properties={"key": "val"}, - ) - _create_person(team_id=self.team.pk, distinct_ids=["p2"], properties={"$browser": "test2"}) - _create_event( - team=self.team, - event="$pageview", - distinct_id="p2", - timestamp="2020-01-02T12:00:00Z", - properties={"key": "val"}, - ) - - cohort = Cohort.objects.create( - team=self.team, - name="a", - groups=[{"properties": [{"key": "$browser", "value": "test", "type": "person"}]}], - ) - cohort.calculate_people_ch(pending_version=0) - - entity_params = [ - { - "id": "$pageview", - "name": "$pageview", - "type": "events", - "order": 0, - "properties": [{"key": "id", "value": cohort.pk, "type": "cohort"}], - } - ] - with self.settings(USE_PRECALCULATED_CH_COHORT_PEOPLE=True): - with freeze_time("2020-01-04T13:01:01Z"): - filter = Filter( - data={ - "insight": "FUNNELS", - "properties": [], - "filter_test_accounts": False, - "events": entity_params, - "actions": [], - "funnel_viz_type": "steps", - "display": "FunnelViz", - "interval": "day", - "breakdown": "$browser", - "breakdown_type": "person", - "breakdown_limit": 5, - "date_from": "-14d", - "funnel_window_days": 14, - } - ) - res = get_breakdown_prop_values(filter, Entity(entity_params[0]), "count(*)", self.team) - self.assertEqual(res[0], ["test"]) - - @snapshot_clickhouse_queries - def test_breakdown_person_props_with_entity_filter_and_or_props_with_partial_pushdown(self): - _create_person( - team_id=self.team.pk, - distinct_ids=["p1"], - properties={"$browser": "test", "$os": "test"}, - ) - _create_event( - team=self.team, - event="$pageview", - distinct_id="p1", - timestamp="2020-01-02T12:00:00Z", - properties={"key": "val"}, - ) - _create_person( - team_id=self.team.pk, - distinct_ids=["p2"], - properties={"$browser": "test2", "$os": "test2"}, - ) - _create_event( - team=self.team, - event="$pageview", - distinct_id="p2", - timestamp="2020-01-02T12:00:00Z", - properties={"key": "val2"}, - ) - _create_person( - team_id=self.team.pk, - distinct_ids=["p3"], - properties={"$browser": "test3", "$os": "test3"}, - ) - _create_event( - team=self.team, - event="$pageview", - distinct_id="p3", - timestamp="2020-01-02T12:00:00Z", - properties={"key": "val3"}, - ) - - entity_params = [ - { - "id": "$pageview", - "name": "$pageview", - "type": "events", - "order": 0, - "properties": [ - { - "key": "$browser", - "type": "person", - "value": "test", - "operator": "icontains", - } - ], - } - ] - with self.settings(USE_PRECALCULATED_CH_COHORT_PEOPLE=True): - with freeze_time("2020-01-04T13:01:01Z"): - filter = Filter( - data={ - "insight": "FUNNELS", - "properties": { - "type": "OR", - "values": [ - { - "key": "$os", - "type": "person", - "value": "test2", - "operator": "exact", - }, - { - "key": "key", - "type": "event", - "value": "val", - "operator": "exact", - }, - ], - }, - "filter_test_accounts": False, - "events": entity_params, - "actions": [], - "funnel_viz_type": "steps", - "display": "FunnelViz", - "interval": "day", - "breakdown": "$browser", - "breakdown_type": "person", - "breakdown_limit": 5, - "date_from": "-14d", - "funnel_window_days": 14, - } - ) - res = sorted(get_breakdown_prop_values(filter, Entity(entity_params[0]), "count(*)", self.team)[0]) - self.assertEqual(res, ["test", "test2"]) - - @snapshot_clickhouse_queries - def test_breakdown_group_props(self): - GroupTypeMapping.objects.create( - team=self.team, project_id=self.team.project_id, group_type="organization", group_type_index=0 - ) - GroupTypeMapping.objects.create( - team=self.team, project_id=self.team.project_id, group_type="company", group_type_index=1 - ) - - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key="org:5", - properties={"industry": "finance"}, - ) - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key="org:6", - properties={"industry": "technology"}, - ) - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key="org:7", - properties={"industry": "finance"}, - ) - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key="org:8", - properties={"industry": "another", "out": 1}, - ) - create_group( - team_id=self.team.pk, - group_type_index=1, - group_key="company:10", - properties={"industry": "foobar"}, - ) - # :TRICKY: Test group type overlapping - create_group( - team_id=self.team.pk, - group_type_index=1, - group_key="org:8", - properties={"industry": "foobar"}, - ) - - for org_index in range(5, 9): - _create_event( - event="$pageview", - distinct_id="person1", - team=self.team, - properties={"$group_0": f"org:{org_index}"}, - timestamp="2020-01-02T12:00:00Z", - ) - - filter = Filter( - data={ - "date_from": "2020-01-01T00:00:00Z", - "date_to": "2020-01-12T00:00:00Z", - "breakdown": "industry", - "breakdown_type": "group", - "breakdown_group_type_index": 0, - "breakdown_limit": 5, - "events": [{"id": "$pageview", "type": "events", "order": 0}], - "properties": [ - { - "key": "out", - "value": "", - "type": "group", - "group_type_index": 0, - "operator": "is_not_set", - } - ], - }, - team=self.team, - ) - result = get_breakdown_prop_values(filter, filter.entities[0], "count(*)", self.team) - self.assertEqual(result[0], ["finance", "technology"]) - - filter = Filter( - data={ - "date_from": "2020-01-01T00:00:00Z", - "date_to": "2020-01-12T00:00:00Z", - "breakdown": "industry", - "breakdown_type": "group", - "breakdown_group_type_index": 0, - "breakdown_limit": 5, - "events": [{"id": "$pageview", "type": "events", "order": 0}], - "properties": { - "type": "AND", - "values": [ - { - "key": "out", - "value": "", - "type": "group", - "group_type_index": 0, - "operator": "is_not_set", - } - ], - }, - } - ) - result = get_breakdown_prop_values(filter, filter.entities[0], "count(*)", self.team) - self.assertEqual(result[0], ["finance", "technology"]) - - @snapshot_clickhouse_queries - def test_breakdown_session_props(self): - _create_person( - team_id=self.team.pk, - distinct_ids=["p1"], - properties={"$browser": "test", "$os": "test"}, - ) - - # 20 second session that starts before the time range - _create_event( - team=self.team, - event="$pageview", - distinct_id="p1", - timestamp="2020-01-01T23:59:50Z", - properties={"$session_id": "1"}, - ) - _create_event( - team=self.team, - event="$pageview", - distinct_id="p1", - timestamp="2020-01-02T00:00:10Z", - properties={"$session_id": "1"}, - ) - - # 70 second session - _create_event( - team=self.team, - event="$pageview", - distinct_id="p1", - timestamp="2020-01-02T12:00:00Z", - properties={"$session_id": "2"}, - ) - _create_event( - team=self.team, - event="$pageview", - distinct_id="p1", - timestamp="2020-01-02T12:01:10Z", - properties={"$session_id": "2"}, - ) - - filter = Filter( - data={ - "date_from": "2020-01-02T00:00:00Z", - "date_to": "2020-01-12T00:00:00Z", - "breakdown": "$session_duration", - "breakdown_type": "session", - "events": [{"id": "$pageview", "type": "events", "order": 0}], - } - ) - result = get_breakdown_prop_values(filter, filter.entities[0], "count(*)", self.team) - self.assertEqual(result[0], [70, 20]) - - @snapshot_clickhouse_queries - def test_breakdown_with_math_property_session(self): - _create_person( - team_id=self.team.pk, - distinct_ids=["p1"], - properties={"$browser": "test", "$os": "test"}, - ) - _create_person( - team_id=self.team.pk, - distinct_ids=["p2"], - properties={"$browser": "mac", "$os": "test"}, - ) - - # 20 second session that starts before the time range - _create_event( - team=self.team, - event="$pageview", - distinct_id="p1", - timestamp="2020-01-01T23:59:50Z", - properties={"$session_id": "1"}, - ) - _create_event( - team=self.team, - event="$pageview", - distinct_id="p1", - timestamp="2020-01-02T00:00:10Z", - properties={"$session_id": "1"}, - ) - - # 70 second session - _create_event( - team=self.team, - event="$pageview", - distinct_id="p1", - timestamp="2020-01-02T12:00:00Z", - properties={"$session_id": "2"}, - ) - _create_event( - team=self.team, - event="$pageview", - distinct_id="p1", - timestamp="2020-01-02T12:01:10Z", - properties={"$session_id": "2"}, - ) - - # 10 second session for second person with different browser, but more absolute - # events than first person - _create_event( - team=self.team, - event="$pageview", - distinct_id="p2", - timestamp="2020-01-02T12:00:00Z", - properties={"$session_id": "3"}, - ) - _create_event( - team=self.team, - event="$pageview", - distinct_id="p2", - timestamp="2020-01-02T12:00:01Z", - properties={"$session_id": "3"}, - ) - _create_event( - team=self.team, - event="$pageview", - distinct_id="p2", - timestamp="2020-01-02T12:00:02Z", - properties={"$session_id": "3"}, - ) - _create_event( - team=self.team, - event="$pageview", - distinct_id="p2", - timestamp="2020-01-02T12:00:03Z", - properties={"$session_id": "3"}, - ) - _create_event( - team=self.team, - event="$pageview", - distinct_id="p2", - timestamp="2020-01-02T12:00:04Z", - properties={"$session_id": "3"}, - ) - _create_event( - team=self.team, - event="$pageview", - distinct_id="p2", - timestamp="2020-01-02T12:00:10Z", - properties={"$session_id": "3"}, - ) - - filter = Filter( - data={ - "date_from": "2020-01-02T00:00:00Z", - "date_to": "2020-01-12T00:00:00Z", - "breakdown": "$browser", - "breakdown_type": "person", - "events": [ - { - "id": "$pageview", - "type": "events", - "order": 0, - "math": "sum", - "math_property": "$session_duration", - } - ], - } - ) - aggregate_operation, _, _ = process_math(filter.entities[0], self.team, filter=filter) - - result = get_breakdown_prop_values(filter, filter.entities[0], aggregate_operation, self.team) - # test should come first, based on aggregate operation, even if absolute count of events for - # mac is higher - self.assertEqual(result[0], ["test", "mac"]) - - result = get_breakdown_prop_values(filter, filter.entities[0], "count(*)", self.team) - self.assertEqual(result[0], ["mac", "test"]) - - -@pytest.mark.parametrize( - "test_input,expected", - [ - (0, "arrayCompact(arrayMap(x -> floor(x, 2), quantiles(0,1)(value)))"), - (1, "arrayCompact(arrayMap(x -> floor(x, 2), quantiles(0,1)(value)))"), - ( - 2, - "arrayCompact(arrayMap(x -> floor(x, 2), quantiles(0.00,0.50,1.00)(value)))", - ), - ( - 3, - "arrayCompact(arrayMap(x -> floor(x, 2), quantiles(0.00,0.33,0.67,1.00)(value)))", - ), - ( - 5, - "arrayCompact(arrayMap(x -> floor(x, 2), quantiles(0.00,0.20,0.40,0.60,0.80,1.00)(value)))", - ), - ( - 7, - "arrayCompact(arrayMap(x -> floor(x, 2), quantiles(0.00,0.14,0.29,0.43,0.57,0.71,0.86,1.00)(value)))", - ), - ( - 10, - "arrayCompact(arrayMap(x -> floor(x, 2), quantiles(0.00,0.10,0.20,0.30,0.40,0.50,0.60,0.70,0.80,0.90,1.00)(value)))", - ), - ], -) -def test_bucketing_expression(test_input, expected): - result = _to_bucketing_expression(test_input) - - assert result == expected diff --git a/ee/clickhouse/queries/test/test_cohort_query.py b/ee/clickhouse/queries/test/test_cohort_query.py deleted file mode 100644 index 9d07d9378d..0000000000 --- a/ee/clickhouse/queries/test/test_cohort_query.py +++ /dev/null @@ -1,3273 +0,0 @@ -from datetime import datetime, timedelta - - -from ee.clickhouse.queries.enterprise_cohort_query import check_negation_clause -from posthog.client import sync_execute -from posthog.constants import PropertyOperatorType -from posthog.models.action import Action -from posthog.models.cohort import Cohort -from posthog.models.filters.filter import Filter -from posthog.models.property import Property, PropertyGroup -from posthog.queries.cohort_query import CohortQuery -from posthog.test.base import ( - BaseTest, - ClickhouseTestMixin, - _create_event, - _create_person, - also_test_with_materialized_columns, - flush_persons_and_events, - snapshot_clickhouse_queries, -) - - -def _make_event_sequence( - team, - distinct_id, - interval_days, - period_event_counts, - event="$pageview", - properties=None, -): - if properties is None: - properties = {} - for period_index, event_count in enumerate(period_event_counts): - for i in range(event_count): - _create_event( - team=team, - event=event, - properties=properties, - distinct_id=distinct_id, - timestamp=datetime.now() - timedelta(days=interval_days * period_index, hours=1, minutes=i), - ) - - -def _create_cohort(**kwargs): - team = kwargs.pop("team") - name = kwargs.pop("name") - groups = kwargs.pop("groups") - is_static = kwargs.pop("is_static", False) - cohort = Cohort.objects.create(team=team, name=name, groups=groups, is_static=is_static) - return cohort - - -class TestCohortQuery(ClickhouseTestMixin, BaseTest): - @snapshot_clickhouse_queries - def test_basic_query(self): - action1 = Action.objects.create( - team=self.team, - name="action1", - steps_json=[ - { - "event": "$autocapture", - "url": "https://posthog.com/feedback/123", - "url_matching": "exact", - } - ], - ) - - # satiesfies all conditions - p1 = _create_person( - team_id=self.team.pk, - distinct_ids=["p1"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - _create_event( - team=self.team, - event="$autocapture", - properties={"$current_url": "https://posthog.com/feedback/123"}, - distinct_id="p1", - timestamp=datetime.now() - timedelta(days=2), - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p1", - timestamp=datetime.now() - timedelta(days=1), - ) - - # doesn't satisfy action - _create_person( - team_id=self.team.pk, - distinct_ids=["p2"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - _create_event( - team=self.team, - event="$autocapture", - properties={"$current_url": "https://posthog.com/feedback/123"}, - distinct_id="p2", - timestamp=datetime.now() - timedelta(weeks=3), - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p2", - timestamp=datetime.now() - timedelta(days=1), - ) - - # doesn't satisfy property condition - _create_person( - team_id=self.team.pk, - distinct_ids=["p3"], - properties={"name": "test", "email": "testXX@posthog.com"}, - ) - _create_event( - team=self.team, - event="$autocapture", - properties={"$current_url": "https://posthog.com/feedback/123"}, - distinct_id="p3", - timestamp=datetime.now() - timedelta(days=2), - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p3", - timestamp=datetime.now() - timedelta(days=1), - ) - flush_persons_and_events() - - filter = Filter( - data={ - "properties": { - "type": "AND", - "values": [ - { - "type": "OR", - "values": [ - { - "key": "$pageview", - "event_type": "events", - "time_value": 1, - "time_interval": "day", - "value": "performed_event", - "type": "behavioral", - }, - { - "key": "$pageview", - "event_type": "events", - "time_value": 2, - "time_interval": "week", - "value": "performed_event", - "type": "behavioral", - }, - ], - }, - { - "type": "AND", - "values": [ - { - "key": action1.pk, - "event_type": "actions", - "time_value": 2, - "time_interval": "week", - "value": "performed_event_first_time", - "type": "behavioral", - }, - { - "key": "email", - "value": "test@posthog.com", - "type": "person", - }, - ], - }, - ], - } - } - ) - - q, params = CohortQuery(filter=filter, team=self.team).get_query() - res = sync_execute(q, {**params, **filter.hogql_context.values}) - - # Since all props should be pushed down here, there should be no full outer join! - self.assertTrue("FULL OUTER JOIN" not in q) - - self.assertEqual([p1.uuid], [r[0] for r in res]) - - def test_performed_event(self): - p1 = _create_person( - team_id=self.team.pk, - distinct_ids=["p1"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p1", - timestamp=datetime.now() - timedelta(days=2), - ) - - _create_person( - team_id=self.team.pk, - distinct_ids=["p2"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p2", - timestamp=datetime.now() - timedelta(days=9), - ) - flush_persons_and_events() - - filter = Filter( - data={ - "properties": { - "type": "AND", - "values": [ - { - "key": "$pageview", - "event_type": "events", - "explicit_datetime": "-1w", - "value": "performed_event", - "type": "behavioral", - } - ], - } - } - ) - - q, params = CohortQuery(filter=filter, team=self.team).get_query() - res = sync_execute(q, {**params, **filter.hogql_context.values}) - - self.assertEqual([p1.uuid], [r[0] for r in res]) - - @snapshot_clickhouse_queries - def test_performed_event_with_event_filters_and_explicit_date(self): - p1 = _create_person( - team_id=self.team.pk, - distinct_ids=["p1"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - _create_event( - team=self.team, - event="$pageview", - properties={"$filter_prop": "something"}, - distinct_id="p1", - timestamp=datetime.now() - timedelta(days=2), - ) - - _create_person( - team_id=self.team.pk, - distinct_ids=["p2"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p2", - timestamp=datetime.now() - timedelta(days=2), - ) - _create_event( - team=self.team, - event="$pageview", - properties={"$filter_prop": "something"}, - distinct_id="p2", - # rejected because explicit datetime is set to 3 days ago - timestamp=datetime.now() - timedelta(days=5), - ) - flush_persons_and_events() - - filter = Filter( - data={ - "properties": { - "type": "AND", - "values": [ - { - "key": "$pageview", - "event_type": "events", - "explicit_datetime": str( - datetime.now() - timedelta(days=3) - ), # overrides time_value and time_interval - "time_value": 1, - "time_interval": "week", - "value": "performed_event", - "type": "behavioral", - "event_filters": [ - {"key": "$filter_prop", "value": "something", "operator": "exact", "type": "event"} - ], - } - ], - } - } - ) - - q, params = CohortQuery(filter=filter, team=self.team).get_query() - res = sync_execute(q, {**params, **filter.hogql_context.values}) - - self.assertEqual([p1.uuid], [r[0] for r in res]) - - def test_performed_event_multiple(self): - p1 = _create_person( - team_id=self.team.pk, - distinct_ids=["p1"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p1", - timestamp=datetime.now() - timedelta(days=2), - ) - - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p1", - timestamp=datetime.now() - timedelta(days=4), - ) - - _create_person( - team_id=self.team.pk, - distinct_ids=["p2"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p2", - timestamp=datetime.now() - timedelta(days=9), - ) - flush_persons_and_events() - - filter = Filter( - data={ - "properties": { - "type": "AND", - "values": [ - { - "key": "$pageview", - "event_type": "events", - "operator": "gte", - "operator_value": 1, - "time_value": 1, - "time_interval": "week", - "value": "performed_event_multiple", - "type": "behavioral", - } - ], - } - } - ) - - q, params = CohortQuery(filter=filter, team=self.team).get_query() - res = sync_execute(q, {**params, **filter.hogql_context.values}) - - self.assertEqual([p1.uuid], [r[0] for r in res]) - - def test_performed_event_multiple_with_event_filters(self): - p1 = _create_person( - team_id=self.team.pk, - distinct_ids=["p1"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - _create_event( - team=self.team, - event="$pageview", - properties={"$filter_prop": "something"}, - distinct_id="p1", - timestamp=datetime.now() - timedelta(days=2), - ) - - _create_event( - team=self.team, - event="$pageview", - properties={"$filter_prop": "something"}, - distinct_id="p1", - timestamp=datetime.now() - timedelta(days=4), - ) - - _create_person( - team_id=self.team.pk, - distinct_ids=["p2"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p2", - timestamp=datetime.now() - timedelta(days=2), - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p2", - timestamp=datetime.now() - timedelta(days=4), - ) - flush_persons_and_events() - - filter = Filter( - data={ - "properties": { - "type": "AND", - "values": [ - { - "key": "$pageview", - "event_type": "events", - "operator": "gte", - "operator_value": 1, - "time_value": 1, - "time_interval": "week", - "value": "performed_event_multiple", - "type": "behavioral", - "event_filters": [ - {"key": "$filter_prop", "value": "something", "operator": "exact", "type": "event"}, - {"key": "$filter_prop", "value": "some", "operator": "icontains", "type": "event"}, - ], - } - ], - } - } - ) - - q, params = CohortQuery(filter=filter, team=self.team).get_query() - res = sync_execute(q, {**params, **filter.hogql_context.values}) - - self.assertEqual([p1.uuid], [r[0] for r in res]) - - def test_performed_event_lte_1_times(self): - _create_person( - team_id=self.team.pk, - distinct_ids=["p1"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - - p2 = _create_person( - team_id=self.team.pk, - distinct_ids=["p2"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p2", - timestamp=datetime.now() - timedelta(hours=9), - ) - - _create_person( - team_id=self.team.pk, - distinct_ids=["p3"], - properties={"name": "test3", "email": "test3@posthog.com"}, - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p3", - timestamp=datetime.now() - timedelta(hours=9), - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p3", - timestamp=datetime.now() - timedelta(hours=8), - ) - flush_persons_and_events() - - filter = Filter( - data={ - "properties": { - "type": "AND", - "values": [ - { - "key": "$pageview", - "event_type": "events", - "operator": "lte", - "operator_value": 1, - "time_value": 1, - "time_interval": "week", - "value": "performed_event_multiple", - "type": "behavioral", - } - ], - } - } - ) - - q, params = CohortQuery(filter=filter, team=self.team).get_query() - res = sync_execute(q, {**params, **filter.hogql_context.values}) - - self.assertEqual({p2.uuid}, {r[0] for r in res}) - - def test_can_handle_many_performed_multiple_filters(self): - p1 = _create_person( - team_id=self.team.pk, - distinct_ids=["p1"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p1", - timestamp=datetime.now() - timedelta(hours=9), - ) - - p2 = _create_person( - team_id=self.team.pk, - distinct_ids=["p2"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p2", - timestamp=datetime.now() - timedelta(hours=9), - ) - - p3 = _create_person( - team_id=self.team.pk, - distinct_ids=["p3"], - properties={"name": "test3", "email": "test3@posthog.com"}, - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p3", - timestamp=datetime.now() - timedelta(hours=9), - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p3", - timestamp=datetime.now() - timedelta(hours=8), - ) - flush_persons_and_events() - - filter = Filter( - data={ - "properties": { - "type": "OR", - "values": [ - { - "key": "$pageview", - "event_type": "events", - "operator": "eq", - "operator_value": 1, - "time_value": 1, - "time_interval": "week", - "value": "performed_event_multiple", - "type": "behavioral", - }, - { - "key": "$pageview", - "event_type": "events", - "operator": "eq", - "operator_value": 2, - "time_value": 1, - "time_interval": "week", - "value": "performed_event_multiple", - "type": "behavioral", - }, - ], - } - } - ) - - q, params = CohortQuery(filter=filter, team=self.team).get_query() - res = sync_execute(q, {**params, **filter.hogql_context.values}) - - self.assertEqual({p1.uuid, p2.uuid, p3.uuid}, {r[0] for r in res}) - - def test_performed_event_zero_times_(self): - filter = Filter( - data={ - "properties": { - "type": "AND", - "values": [ - { - "key": "$pageview", - "event_type": "events", - "operator": "eq", - "operator_value": 0, - "time_value": 1, - "time_interval": "week", - "value": "performed_event_multiple", - "type": "behavioral", - } - ], - } - } - ) - with self.assertRaises(ValueError): - CohortQuery(filter=filter, team=self.team).get_query() - - def test_stopped_performing_event(self): - p1 = _create_person( - team_id=self.team.pk, - distinct_ids=["p1"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p1", - timestamp=datetime.now() - timedelta(days=10), - ) - - _create_person( - team_id=self.team.pk, - distinct_ids=["p2"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p2", - timestamp=datetime.now() - timedelta(days=3), - ) - flush_persons_and_events() - - filter = Filter( - data={ - "properties": { - "type": "AND", - "values": [ - { - "key": "$pageview", - "event_type": "events", - "time_value": 2, - "time_interval": "week", - "seq_time_value": 1, - "seq_time_interval": "week", - "value": "stopped_performing_event", - "type": "behavioral", - } - ], - } - } - ) - - q, params = CohortQuery(filter=filter, team=self.team).get_query() - res = sync_execute(q, {**params, **filter.hogql_context.values}) - - self.assertEqual([p1.uuid], [r[0] for r in res]) - - def test_stopped_performing_event_raises_if_seq_date_later_than_date(self): - filter = Filter( - data={ - "properties": { - "type": "AND", - "values": [ - { - "key": "$pageview", - "event_type": "events", - "time_value": 1, - "time_interval": "day", - "seq_time_value": 2, - "seq_time_interval": "day", - "value": "stopped_performing_event", - "type": "behavioral", - } - ], - } - } - ) - - with self.assertRaises(ValueError): - CohortQuery(filter=filter, team=self.team).get_query() - - def test_restarted_performing_event(self): - p1 = _create_person( - team_id=self.team.pk, - distinct_ids=["p1"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - _create_person( - team_id=self.team.pk, - distinct_ids=["p2"], - properties={"name": "test2", "email": "test2@posthog.com"}, - ) - _create_person( - team_id=self.team.pk, - distinct_ids=["p3"], - properties={"name": "test3", "email": "test3@posthog.com"}, - ) - - # P1 events (proper restarting sequence) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p1", - timestamp=datetime.now() - timedelta(days=20), - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p1", - timestamp=datetime.now() - timedelta(days=1), - ) - - # P2 events (an event occurs in the middle of the sequence, so the event never "stops") - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p2", - timestamp=datetime.now() - timedelta(days=20), - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p2", - timestamp=datetime.now() - timedelta(days=5), - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p2", - timestamp=datetime.now() - timedelta(days=1), - ) - - # P3 events (the event just started, so it isn't considered a restart) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p3", - timestamp=datetime.now() - timedelta(days=1), - ) - flush_persons_and_events() - - filter = Filter( - data={ - "properties": { - "type": "AND", - "values": [ - { - "key": "$pageview", - "event_type": "events", - "time_value": 1, - "time_interval": "week", - "seq_time_value": 2, - "seq_time_interval": "day", - "value": "restarted_performing_event", - "type": "behavioral", - } - ], - } - } - ) - - q, params = CohortQuery(filter=filter, team=self.team).get_query() - res = sync_execute(q, {**params, **filter.hogql_context.values}) - - self.assertEqual([p1.uuid], [r[0] for r in res]) - - def test_restarted_performing_event_raises_if_seq_date_later_than_date(self): - filter = Filter( - data={ - "properties": { - "type": "AND", - "values": [ - { - "key": "$pageview", - "event_type": "events", - "time_value": 1, - "time_interval": "day", - "seq_time_value": 2, - "seq_time_interval": "day", - "value": "restarted_performing_event", - "type": "behavioral", - } - ], - } - } - ) - - with self.assertRaises(ValueError): - CohortQuery(filter=filter, team=self.team).get_query() - - def test_performed_event_first_time(self): - _create_person( - team_id=self.team.pk, - distinct_ids=["p1"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - p2 = _create_person( - team_id=self.team.pk, - distinct_ids=["p2"], - properties={"name": "test2", "email": "test2@posthog.com"}, - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p1", - timestamp=datetime.now() - timedelta(days=20), - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p1", - timestamp=datetime.now() - timedelta(days=4), - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p2", - timestamp=datetime.now() - timedelta(days=4), - ) - filter = Filter( - data={ - "properties": { - "type": "AND", - "values": [ - { - "key": "$pageview", - "event_type": "events", - "time_value": 1, - "time_interval": "week", - "value": "performed_event_first_time", - "type": "behavioral", - } - ], - } - } - ) - - q, params = CohortQuery(filter=filter, team=self.team).get_query() - res = sync_execute(q, {**params, **filter.hogql_context.values}) - - self.assertEqual([p2.uuid], [r[0] for r in res]) - - def test_performed_event_regularly(self): - p1 = _create_person( - team_id=self.team.pk, - distinct_ids=["p1"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - - _make_event_sequence(self.team, "p1", 3, [1, 1, 1]) - flush_persons_and_events() - # Filter for: - # Regularly completed [$pageview] [at least] [1] times per - # [3][day] period for at least [3] of the last [3] periods - filter = Filter( - data={ - "properties": { - "type": "AND", - "values": [ - { - "key": "$pageview", - "event_type": "events", - "operator": "gte", - "operator_value": 1, - "time_interval": "day", - "time_value": 3, - "total_periods": 3, - "min_periods": 3, - "value": "performed_event_regularly", - "type": "behavioral", - } - ], - } - } - ) - - q, params = CohortQuery(filter=filter, team=self.team).get_query() - res = sync_execute(q, {**params, **filter.hogql_context.values}) - - self.assertEqual([p1.uuid], [r[0] for r in res]) - - def test_performed_event_regularly_with_variable_event_counts_in_each_period(self): - p1 = _create_person( - team_id=self.team.pk, - distinct_ids=["p1"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - p2 = _create_person( - team_id=self.team.pk, - distinct_ids=["p2"], - properties={"name": "test2", "email": "test2@posthog.com"}, - ) - # p1 gets variable number of events in each period - _make_event_sequence(self.team, "p1", 3, [0, 1, 2]) - # p2 gets 10 events in each period - _make_event_sequence(self.team, "p2", 3, [1, 2, 2]) - - # Filter for: - # Regularly completed [$pageview] [at least] [2] times per - # [3][day] period for at least [2] of the last [3] periods - filter = Filter( - data={ - "properties": { - "type": "AND", - "values": [ - { - "key": "$pageview", - "event_type": "events", - "operator": "gte", - "operator_value": 2, - "time_interval": "day", - "time_value": 3, - "total_periods": 3, - "min_periods": 2, - "value": "performed_event_regularly", - "type": "behavioral", - } - ], - } - } - ) - - q, params = CohortQuery(filter=filter, team=self.team).get_query() - res = sync_execute(q, {**params, **filter.hogql_context.values}) - self.assertEqual([p2.uuid], [r[0] for r in res]) - flush_persons_and_events() - - # Filter for: - # Regularly completed [$pageview] [at least] [1] times per - # [3][day] period for at least [2] of the last [3] periods - filter = Filter( - data={ - "properties": { - "type": "AND", - "values": [ - { - "key": "$pageview", - "event_type": "events", - "operator": "gte", - "operator_value": 1, - "time_interval": "day", - "time_value": 3, - "total_periods": 3, - "min_periods": 2, - "value": "performed_event_regularly", - "type": "behavioral", - } - ], - } - } - ) - - q, params = CohortQuery(filter=filter, team=self.team).get_query() - res = sync_execute(q, {**params, **filter.hogql_context.values}) - self.assertEqual({p1.uuid, p2.uuid}, {r[0] for r in res}) - - @snapshot_clickhouse_queries - def test_person_props_only(self): - p1 = _create_person( - team_id=self.team.pk, - distinct_ids=["p1"], - properties={"name": "test", "email": "test1@posthog.com"}, - ) - p2 = _create_person( - team_id=self.team.pk, - distinct_ids=["p2"], - properties={"name": "test", "email": "test2@posthog.com"}, - ) - p3 = _create_person( - team_id=self.team.pk, - distinct_ids=["p3"], - properties={"name": "test3", "email": "test3@posthog.com"}, - ) - # doesn't match - _create_person( - team_id=self.team.pk, - distinct_ids=["p4"], - properties={"name": "test3", "email": "test4@posthog.com"}, - ) - - filter = Filter( - data={ - "properties": { - "type": "OR", - "values": [ - { - "type": "OR", - "values": [ - { - "key": "email", - "value": "test1@posthog.com", - "type": "person", - }, - { - "key": "email", - "value": "test2@posthog.com", - "type": "person", - }, - ], - }, - { - "type": "AND", - "values": [ - {"key": "name", "value": "test3", "type": "person"}, - { - "key": "email", - "value": "test3@posthog.com", - "type": "person", - }, - ], - }, - ], - } - } - ) - - q, params = CohortQuery(filter=filter, team=self.team).get_query() - res = sync_execute(q, {**params, **filter.hogql_context.values}) - - # Since all props should be pushed down here, there should be no full outer join! - self.assertTrue("FULL OUTER JOIN" not in q) - - self.assertCountEqual([p1.uuid, p2.uuid, p3.uuid], [r[0] for r in res]) - - @snapshot_clickhouse_queries - def test_person_properties_with_pushdowns(self): - action1 = Action.objects.create( - team=self.team, - name="action1", - steps_json=[ - { - "event": "$autocapture", - "url": "https://posthog.com/feedback/123", - "url_matching": "exact", - } - ], - ) - - # satiesfies all conditions - p1 = _create_person( - team_id=self.team.pk, - distinct_ids=["p1"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - _create_event( - team=self.team, - event="$autocapture", - properties={"$current_url": "https://posthog.com/feedback/123"}, - distinct_id="p1", - timestamp=datetime.now() - timedelta(days=2), - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p1", - timestamp=datetime.now() - timedelta(days=1), - ) - - # doesn't satisfy action - _create_person( - team_id=self.team.pk, - distinct_ids=["p2"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - _create_event( - team=self.team, - event="$autocapture", - properties={"$current_url": "https://posthog.com/feedback/123"}, - distinct_id="p2", - timestamp=datetime.now() - timedelta(weeks=3), - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p2", - timestamp=datetime.now() - timedelta(days=1), - ) - - # satisfies special condition (not pushed down person property in OR group) - p3 = _create_person( - team_id=self.team.pk, - distinct_ids=["p3"], - properties={"name": "special", "email": "test@posthog.com"}, - ) - _create_event( - team=self.team, - event="$autocapture", - properties={"$current_url": "https://posthog.com/feedback/123"}, - distinct_id="p3", - timestamp=datetime.now() - timedelta(days=2), - ) - flush_persons_and_events() - - filter = Filter( - data={ - "properties": { - "type": "AND", - "values": [ - { - "type": "OR", - "values": [ - { - "key": "$pageview", - "event_type": "events", - "time_value": 1, - "time_interval": "day", - "value": "performed_event", - "type": "behavioral", - }, - { - "key": "$pageview", - "event_type": "events", - "time_value": 2, - "time_interval": "week", - "value": "performed_event", - "type": "behavioral", - }, - { - "key": "name", - "value": "special", - "type": "person", - }, # this is NOT pushed down - ], - }, - { - "type": "AND", - "values": [ - { - "key": action1.pk, - "event_type": "actions", - "time_value": 2, - "time_interval": "week", - "value": "performed_event_first_time", - "type": "behavioral", - }, - { - "key": "email", - "value": "test@posthog.com", - "type": "person", - }, # this is pushed down - ], - }, - ], - } - } - ) - - q, params = CohortQuery(filter=filter, team=self.team).get_query() - res = sync_execute(q, {**params, **filter.hogql_context.values}) - - self.assertCountEqual([p1.uuid, p3.uuid], [r[0] for r in res]) - - @also_test_with_materialized_columns(person_properties=["$sample_field"]) - @snapshot_clickhouse_queries - def test_person(self): - # satiesfies all conditions - p1 = _create_person( - team_id=self.team.pk, - distinct_ids=["p1"], - properties={"name": "test", "$sample_field": "test@posthog.com"}, - ) - filter = Filter( - data={ - "properties": { - "type": "OR", - "values": [ - { - "key": "$pageview", - "event_type": "events", - "time_value": 1, - "time_interval": "week", - "value": "performed_event", - "type": "behavioral", - }, - { - "key": "$sample_field", - "value": "test@posthog.com", - "type": "person", - }, - ], - } - } - ) - flush_persons_and_events() - - q, params = CohortQuery(filter=filter, team=self.team).get_query() - res = sync_execute(q, {**params, **filter.hogql_context.values}) - - self.assertEqual([p1.uuid], [r[0] for r in res]) - - def test_earliest_date_clause(self): - filter = Filter( - data={ - "properties": { - "type": "AND", - "values": [ - { - "key": "$pageview", - "event_type": "events", - "time_value": 1, - "time_interval": "week", - "value": "performed_event", - "type": "behavioral", - }, - { - "key": "$pageview", - "event_type": "events", - "time_value": 2, - "time_interval": "week", - "value": "performed_event_multiple", - "operator_value": 1, - "type": "behavioral", - }, - { - "key": "$pageview", - "event_type": "events", - "time_value": 4, - "time_interval": "week", - "seq_time_value": 1, - "seq_time_interval": "week", - "value": "stopped_performing_event", - "type": "behavioral", - }, - { - "key": "$pageview", - "event_type": "events", - "time_value": 1, - "time_interval": "week", - "value": "performed_event", - "type": "behavioral", - }, - { - "key": "$pageview", - "event_type": "events", - "operator": "gte", - "operator_value": 2, - "time_interval": "week", - "time_value": 3, - "total_periods": 3, - "min_periods": 2, - "value": "performed_event_regularly", - "type": "behavioral", - }, - ], - } - } - ) - - q, params = CohortQuery(filter=filter, team=self.team).get_query() - sync_execute(q, {**params, **filter.hogql_context.values}) - - self.assertTrue("timestamp >= now() - INTERVAL 9 week" in (q % params)) - - def test_earliest_date_clause_removed_for_started_at_query(self): - filter = Filter( - data={ - "properties": { - "type": "AND", - "values": [ - { - "key": "$pageview", - "event_type": "events", - "time_value": 2, - "time_interval": "week", - "value": "performed_event_first_time", - "type": "behavioral", - }, - { - "key": "$pageview", - "event_type": "events", - "operator": "gte", - "operator_value": 2, - "time_interval": "week", - "time_value": 3, - "total_periods": 3, - "min_periods": 2, - "value": "performed_event_regularly", - "type": "behavioral", - }, - ], - } - } - ) - query_class = CohortQuery(filter=filter, team=self.team) - q, params = query_class.get_query() - self.assertFalse(query_class._restrict_event_query_by_time) - sync_execute(q, {**params, **filter.hogql_context.values}) - - def test_negation(self): - _create_person( - team_id=self.team.pk, - distinct_ids=["p1"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p1", - timestamp=datetime.now() - timedelta(days=2), - ) - - _create_person( - team_id=self.team.pk, - distinct_ids=["p2"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p2", - timestamp=datetime.now() - timedelta(days=10), - ) - flush_persons_and_events() - - filter = Filter( - data={ - "properties": { - "type": "AND", - "values": [ - { - "key": "$pageview", - "event_type": "events", - "time_value": 1, - "time_interval": "week", - "value": "performed_event", - "type": "behavioral", - "negation": True, - } - ], - } - } - ) - - self.assertRaises(ValueError, lambda: CohortQuery(filter=filter, team=self.team)) - - def test_negation_with_simplify_filters(self): - _create_person( - team_id=self.team.pk, - distinct_ids=["p1"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p1", - timestamp=datetime.now() - timedelta(days=2), - ) - - _create_person( - team_id=self.team.pk, - distinct_ids=["p2"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p2", - timestamp=datetime.now() - timedelta(days=10), - ) - - p3 = _create_person( - team_id=self.team.pk, - distinct_ids=["p3"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - _create_event( - team=self.team, - event="$feature_flag_called", - properties={}, - distinct_id="p3", - timestamp=datetime.now() - timedelta(days=10), - ) - flush_persons_and_events() - - filter = Filter( - data={ - "properties": { - "type": "AND", - "values": [ - { - "key": "$pageview", - "type": "behavioral", - "value": "performed_event", - "negation": True, - "event_type": "events", - "time_value": "30", - "time_interval": "day", - }, - { - "key": "$feature_flag_called", - "type": "behavioral", - "value": "performed_event", - "negation": False, - "event_type": "events", - "time_value": "30", - "time_interval": "day", - }, - ], - } - }, - team=self.team, - ) - - q, params = CohortQuery(filter=filter, team=self.team).get_query() - res = sync_execute(q, {**params, **filter.hogql_context.values}) - self.assertCountEqual([p3.uuid], [r[0] for r in res]) - - def test_negation_dynamic_time_bound_with_performed_event(self): - # invalid dude because $pageview happened too early - _create_person( - team_id=self.team.pk, - distinct_ids=["p1"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p1", - timestamp=datetime.now() - timedelta(days=2), - ) - - _create_event( - team=self.team, - event="$new_view", - properties={}, - distinct_id="p1", - timestamp=datetime.now() - timedelta(days=4), - ) - - # invalid dude because no new_view event - _create_person( - team_id=self.team.pk, - distinct_ids=["p2"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p2", - timestamp=datetime.now() - timedelta(days=4), - ) - - # valid dude because $pageview happened a long time ago - p3 = _create_person( - team_id=self.team.pk, - distinct_ids=["p3"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p3", - timestamp=datetime.now() - timedelta(days=35), - ) - - _create_event( - team=self.team, - event="$new_view", - properties={}, - distinct_id="p3", - timestamp=datetime.now() - timedelta(days=4), - ) - - # valid dude because $pageview did not happen - p4 = _create_person( - team_id=self.team.pk, - distinct_ids=["p4"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - _create_event( - team=self.team, - event="$new_view", - properties={}, - distinct_id="p4", - timestamp=datetime.now() - timedelta(days=4), - ) - flush_persons_and_events() - - filter = Filter( - data={ - "properties": { - "type": "AND", - "values": [ - { - "key": "$new_view", - "event_type": "events", - "time_value": 1, - "time_interval": "week", - "value": "performed_event", - "type": "behavioral", - }, - { - "key": "$pageview", - "event_type": "events", - "time_value": 2, - "time_interval": "week", - "value": "performed_event", - "type": "behavioral", - "negation": True, - }, - ], - } - } - ) - - q, params = CohortQuery(filter=filter, team=self.team).get_query() - res = sync_execute(q, {**params, **filter.hogql_context.values}) - - self.assertCountEqual([p3.uuid, p4.uuid], [r[0] for r in res]) - - def test_negation_dynamic_time_bound_with_performed_event_sequence(self): - # invalid dude because $pageview sequence happened too early - _create_person( - team_id=self.team.pk, - distinct_ids=["p1"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - # pageview sequence that happens today, and 2 days ago - _make_event_sequence(self.team, "p1", 2, [1, 1]) - _create_event( - team=self.team, - event="$new_view", - properties={}, - distinct_id="p1", - timestamp=datetime.now() - timedelta(days=4), - ) - - # invalid dude because no new_view event - _create_person( - team_id=self.team.pk, - distinct_ids=["p2"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - _make_event_sequence(self.team, "p2", 2, [1, 1]) - - # valid dude because $pageview sequence happened a long time ago - p3 = _create_person( - team_id=self.team.pk, - distinct_ids=["p3"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p3", - timestamp=datetime.now() - timedelta(days=35), - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p3", - timestamp=datetime.now() - timedelta(days=37), - ) - _create_event( - team=self.team, - event="$new_view", - properties={}, - distinct_id="p3", - timestamp=datetime.now() - timedelta(days=4), - ) - - # valid dude because $pageview sequence did not happen - p4 = _create_person( - team_id=self.team.pk, - distinct_ids=["p4"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - _create_event( - team=self.team, - event="$new_view", - properties={}, - distinct_id="p4", - timestamp=datetime.now() - timedelta(days=4), - ) - - # valid dude because $pageview sequence did not complete, even if one pageview happened - p5 = _create_person( - team_id=self.team.pk, - distinct_ids=["p5"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p5", - timestamp=datetime.now() - timedelta(days=5), - ) - _create_event( - team=self.team, - event="$new_view", - properties={}, - distinct_id="p5", - timestamp=datetime.now() - timedelta(days=4), - ) - - # valid dude because $pageview sequence delay was long enough, even if it happened too early - p6 = _create_person( - team_id=self.team.pk, - distinct_ids=["p6"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - # pageview sequence that happens today, and 4 days ago - _make_event_sequence(self.team, "p6", 4, [1, 1]) - _create_event( - team=self.team, - event="$new_view", - properties={}, - distinct_id="p6", - timestamp=datetime.now() - timedelta(days=4), - ) - flush_persons_and_events() - - filter = Filter( - data={ - "properties": { - "type": "AND", - "values": [ - { - "key": "$new_view", - "event_type": "events", - "time_value": 1, - "time_interval": "week", - "value": "performed_event", - "type": "behavioral", - }, - { - "key": "$pageview", - "event_type": "events", - "time_interval": "day", - "time_value": 8, - "seq_time_interval": "day", - "seq_time_value": 3, - "seq_event": "$pageview", - "seq_event_type": "events", - "value": "performed_event_sequence", - "type": "behavioral", - "negation": True, - }, - ], - } - } - ) - - q, params = CohortQuery(filter=filter, team=self.team).get_query() - res = sync_execute(q, {**params, **filter.hogql_context.values}) - self.assertCountEqual([p3.uuid, p4.uuid, p5.uuid, p6.uuid], [r[0] for r in res]) - - def test_cohort_filter(self): - p1 = _create_person( - team_id=self.team.pk, - distinct_ids=["p1"], - properties={"name": "test", "name": "test"}, - ) - cohort = _create_cohort( - team=self.team, - name="cohort1", - groups=[{"properties": [{"key": "name", "value": "test", "type": "person"}]}], - ) - flush_persons_and_events() - - filter = Filter( - data={ - "properties": { - "type": "AND", - "values": [{"key": "id", "value": cohort.pk, "type": "cohort"}], - } - } - ) - - q, params = CohortQuery(filter=filter, team=self.team).get_query() - res = sync_execute(q, {**params, **filter.hogql_context.values}) - - self.assertEqual([p1.uuid], [r[0] for r in res]) - - def test_faulty_type(self): - cohort = _create_cohort( - team=self.team, - name="cohort1", - groups=[ - { - "properties": [ - { - "key": "email", - "type": "event", - "value": ["fake@test.com"], - "operator": "exact", - } - ] - } - ], - ) - - self.assertEqual( - cohort.properties.to_dict(), - { - "type": "OR", - "values": [ - { - "type": "AND", - "values": [ - { - "key": "email", - "value": ["fake@test.com"], - "operator": "exact", - "type": "person", - } - ], - } - ], - }, - ) - - def test_missing_type(self): - cohort = _create_cohort( - team=self.team, - name="cohort1", - groups=[ - { - "properties": [ - { - "key": "email", - "value": ["fake@test.com"], - "operator": "exact", - } - ] - } - ], - ) - - self.assertEqual( - cohort.properties.to_dict(), - { - "type": "OR", - "values": [ - { - "type": "AND", - "values": [ - { - "key": "email", - "value": ["fake@test.com"], - "operator": "exact", - "type": "person", - } - ], - } - ], - }, - ) - - def test_old_old_style_properties(self): - cohort = _create_cohort( - team=self.team, - name="cohort1", - groups=[ - { - "properties": [ - { - "key": "email", - "value": ["fake@test.com"], - "operator": "exact", - } - ] - }, - {"properties": {"abra": "cadabra", "name": "alakazam"}}, - ], - ) - - self.assertEqual( - cohort.properties.to_dict(), - { - "type": "OR", - "values": [ - { - "type": "AND", - "values": [ - { - "key": "email", - "value": ["fake@test.com"], - "operator": "exact", - "type": "person", - } - ], - }, - { - "type": "AND", - "values": [ - {"key": "abra", "value": "cadabra", "type": "person"}, - {"key": "name", "value": "alakazam", "type": "person"}, - ], - }, - ], - }, - ) - - def test_precalculated_cohort_filter(self): - p1 = _create_person( - team_id=self.team.pk, - distinct_ids=["p1"], - properties={"name": "test", "name": "test"}, - ) - cohort = _create_cohort( - team=self.team, - name="cohort1", - groups=[{"properties": [{"key": "name", "value": "test", "type": "person"}]}], - ) - flush_persons_and_events() - - filter = Filter( - data={ - "properties": { - "type": "OR", - "values": [ - { - "key": "id", - "value": cohort.pk, - "type": "precalculated-cohort", - } - ], - } - } - ) - - cohort.calculate_people_ch(pending_version=0) - - with self.settings(USE_PRECALCULATED_CH_COHORT_PEOPLE=True): - q, params = CohortQuery(filter=filter, team=self.team).get_query() - # Precalculated cohorts should not be used as is - # since we want cohort calculation with cohort properties to not be out of sync - self.assertTrue("cohortpeople" not in q) - res = sync_execute(q, {**params, **filter.hogql_context.values}) - - self.assertEqual([p1.uuid], [r[0] for r in res]) - - @snapshot_clickhouse_queries - def test_precalculated_cohort_filter_with_extra_filters(self): - p1 = _create_person(team_id=self.team.pk, distinct_ids=["p1"], properties={"name": "test"}) - p2 = _create_person(team_id=self.team.pk, distinct_ids=["p2"], properties={"name": "test2"}) - _create_person(team_id=self.team.pk, distinct_ids=["p3"], properties={"name": "test3"}) - - cohort = _create_cohort( - team=self.team, - name="cohort1", - groups=[{"properties": [{"key": "name", "value": "test", "type": "person"}]}], - ) - flush_persons_and_events() - - filter = Filter( - data={ - "properties": { - "type": "OR", - "values": [ - { - "key": "id", - "value": cohort.pk, - "type": "precalculated-cohort", - }, - {"key": "name", "value": "test2", "type": "person"}, - ], - } - } - ) - - # makes sure cohort is precalculated - cohort.calculate_people_ch(pending_version=0) - - with self.settings(USE_PRECALCULATED_CH_COHORT_PEOPLE=True): - q, params = CohortQuery(filter=filter, team=self.team).get_query() - self.assertTrue("cohortpeople" not in q) - res = sync_execute(q, {**params, **filter.hogql_context.values}) - - self.assertCountEqual([p1.uuid, p2.uuid], [r[0] for r in res]) - - @snapshot_clickhouse_queries - def test_cohort_filter_with_extra(self): - p1 = _create_person( - team_id=self.team.pk, - distinct_ids=["p1"], - properties={"name": "test", "name": "test"}, - ) - cohort = _create_cohort( - team=self.team, - name="cohort1", - groups=[{"properties": [{"key": "name", "value": "test", "type": "person"}]}], - ) - - p2 = _create_person( - team_id=self.team.pk, - distinct_ids=["p2"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p2", - timestamp=datetime.now() - timedelta(days=2), - ) - flush_persons_and_events() - - filter = Filter( - data={ - "properties": { - "type": "AND", - "values": [ - {"key": "id", "value": cohort.pk, "type": "cohort"}, - { - "key": "$pageview", - "event_type": "events", - "time_value": 1, - "time_interval": "week", - "value": "performed_event", - "type": "behavioral", - }, - ], - } - } - ) - - q, params = CohortQuery(filter=filter, team=self.team).get_query() - res = sync_execute(q, {**params, **filter.hogql_context.values}) - - self.assertEqual([p2.uuid], [r[0] for r in res]) - - filter = Filter( - data={ - "properties": { - "type": "OR", - "values": [ - {"key": "id", "value": cohort.pk, "type": "cohort"}, - { - "key": "$pageview", - "event_type": "events", - "time_value": 1, - "time_interval": "week", - "value": "performed_event", - "type": "behavioral", - }, - ], - } - }, - team=self.team, - ) - - q, params = CohortQuery(filter=filter, team=self.team).get_query() - res = sync_execute(q, {**params, **filter.hogql_context.values}) - - self.assertCountEqual([p1.uuid, p2.uuid], [r[0] for r in res]) - - @snapshot_clickhouse_queries - def test_cohort_filter_with_another_cohort_with_event_sequence(self): - # passes filters for cohortCeption, but not main cohort - _create_person( - team_id=self.team.pk, - distinct_ids=["p1"], - properties={"name": "test", "email": "test@gmail.com"}, - ) - _make_event_sequence(self.team, "p1", 2, [1, 1]) - - # passes filters for cohortCeption and main cohort - p2 = _create_person( - team_id=self.team.pk, - distinct_ids=["p2"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - _make_event_sequence(self.team, "p2", 2, [1, 1]) - _make_event_sequence(self.team, "p2", 6, [1, 1], event="$new_view") - - # passes filters for neither cohortCeption nor main cohort - _create_person( - team_id=self.team.pk, - distinct_ids=["p3"], - properties={"email": "test@posthog.com"}, - ) - _make_event_sequence(self.team, "p3", 2, [1, 1]) - - # passes filters for mainCohort but not cohortCeption - _create_person( - team_id=self.team.pk, - distinct_ids=["p4"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - _make_event_sequence(self.team, "p4", 6, [1, 1]) - _make_event_sequence(self.team, "p4", 6, [1, 1], event="$new_view") - flush_persons_and_events() - - cohort = Cohort.objects.create( - team=self.team, - name="cohortCeption", - filters={ - "properties": { - "type": "AND", - "values": [ - {"key": "name", "value": "test", "type": "person"}, - { - "key": "$pageview", - "event_type": "events", - "time_interval": "day", - "time_value": 8, - "seq_time_interval": "day", - "seq_time_value": 3, - "seq_event": "$pageview", - "seq_event_type": "events", - "value": "performed_event_sequence", - "type": "behavioral", - }, - ], - } - }, - ) - - filter = Filter( - data={ - "properties": { - "type": "AND", - "values": [ - {"key": "id", "value": cohort.pk, "type": "cohort"}, - { - "key": "$new_view", - "event_type": "events", - "time_interval": "day", - "time_value": 8, - "seq_time_interval": "day", - "seq_time_value": 8, - "seq_event": "$new_view", - "seq_event_type": "events", - "value": "performed_event_sequence", - "type": "behavioral", - }, - ], - } - } - ) - - q, params = CohortQuery(filter=filter, team=self.team).get_query() - res = sync_execute(q, {**params, **filter.hogql_context.values}) - - self.assertEqual([p2.uuid], [r[0] for r in res]) - - @snapshot_clickhouse_queries - def test_static_cohort_filter(self): - p1 = _create_person( - team_id=self.team.pk, - distinct_ids=["p1"], - properties={"name": "test", "name": "test"}, - ) - cohort = _create_cohort(team=self.team, name="cohort1", groups=[], is_static=True) - flush_persons_and_events() - cohort.insert_users_by_list(["p1"]) - - filter = Filter( - data={ - "properties": { - "type": "OR", - "values": [{"key": "id", "value": cohort.pk, "type": "static-cohort"}], - } - } - ) - - q, params = CohortQuery(filter=filter, team=self.team).get_query() - res = sync_execute(q, {**params, **filter.hogql_context.values}) - - self.assertEqual([p1.uuid], [r[0] for r in res]) - - @snapshot_clickhouse_queries - def test_static_cohort_filter_with_extra(self): - p1 = _create_person( - team_id=self.team.pk, - distinct_ids=["p1"], - properties={"name": "test", "name": "test"}, - ) - cohort = _create_cohort(team=self.team, name="cohort1", groups=[], is_static=True) - - p2 = _create_person( - team_id=self.team.pk, - distinct_ids=["p2"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p2", - timestamp=datetime.now() - timedelta(days=2), - ) - flush_persons_and_events() - cohort.insert_users_by_list(["p1", "p2"]) - - filter = Filter( - data={ - "properties": { - "type": "AND", - "values": [ - {"key": "id", "value": cohort.pk, "type": "cohort"}, - { - "key": "$pageview", - "event_type": "events", - "time_value": 1, - "time_interval": "week", - "value": "performed_event", - "type": "behavioral", - }, - ], - } - } - ) - - q, params = CohortQuery(filter=filter, team=self.team).get_query() - res = sync_execute(q, {**params, **filter.hogql_context.values}) - - self.assertEqual([p2.uuid], [r[0] for r in res]) - - filter = Filter( - data={ - "properties": { - "type": "OR", - "values": [ - {"key": "id", "value": cohort.pk, "type": "cohort"}, - { - "key": "$pageview", - "event_type": "events", - "time_value": 1, - "time_interval": "week", - "value": "performed_event", - "type": "behavioral", - }, - ], - } - }, - team=self.team, - ) - - q, params = CohortQuery(filter=filter, team=self.team).get_query() - res = sync_execute(q, {**params, **filter.hogql_context.values}) - - self.assertCountEqual([p1.uuid, p2.uuid], [r[0] for r in res]) - - @snapshot_clickhouse_queries - def test_performed_event_sequence(self): - p1 = _create_person( - team_id=self.team.pk, - distinct_ids=["p1"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - - _make_event_sequence(self.team, "p1", 2, [1, 1]) - - _create_person( - team_id=self.team.pk, - distinct_ids=["p2"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p2", - timestamp=datetime.now() - timedelta(days=2), - ) - flush_persons_and_events() - - filter = Filter( - data={ - "properties": { - "type": "AND", - "values": [ - { - "key": "$pageview", - "event_type": "events", - "time_interval": "day", - "time_value": 7, - "seq_time_interval": "day", - "seq_time_value": 3, - "seq_event": "$pageview", - "seq_event_type": "events", - "value": "performed_event_sequence", - "type": "behavioral", - } - ], - } - } - ) - - q, params = CohortQuery(filter=filter, team=self.team).get_query() - res = sync_execute(q, {**params, **filter.hogql_context.values}) - - self.assertEqual([p1.uuid], [r[0] for r in res]) - - @also_test_with_materialized_columns(event_properties=["$current_url"]) - def test_performed_event_sequence_with_action(self): - p1 = _create_person( - team_id=self.team.pk, - distinct_ids=["p1"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - - action1 = Action.objects.create( - team=self.team, - name="action1", - steps_json=[ - { - "event": "$pageview", - "url": "https://posthog.com/feedback/123", - "url_matching": "exact", - } - ], - ) - - _make_event_sequence( - self.team, - "p1", - 2, - [1, 1], - properties={"$current_url": "https://posthog.com/feedback/123"}, - ) - - _create_person( - team_id=self.team.pk, - distinct_ids=["p2"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - - _create_event( - team=self.team, - event="$pageview", - properties={"$current_url": "https://posthog.com/feedback/123"}, - distinct_id="p2", - timestamp=datetime.now() - timedelta(days=2), - ) - flush_persons_and_events() - - filter = Filter( - data={ - "properties": { - "type": "AND", - "values": [ - { - "key": action1.pk, - "event_type": "actions", - "time_interval": "day", - "time_value": 7, - "seq_time_interval": "day", - "seq_time_value": 3, - "seq_event": action1.pk, - "seq_event_type": "actions", - "value": "performed_event_sequence", - "type": "behavioral", - } - ], - } - } - ) - - q, params = CohortQuery(filter=filter, team=self.team).get_query() - res = sync_execute(q, {**params, **filter.hogql_context.values}) - - self.assertEqual([p1.uuid], [r[0] for r in res]) - - def test_performed_event_sequence_with_restarted(self): - p1 = _create_person( - team_id=self.team.pk, - distinct_ids=["p1"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - - _make_event_sequence(self.team, "p1", 2, [1, 1]) - - p2 = _create_person( - team_id=self.team.pk, - distinct_ids=["p2"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - - _create_event( - team=self.team, - event="$new_view", - properties={}, - distinct_id="p2", - timestamp=datetime.now() - timedelta(days=18), - ) - _create_event( - team=self.team, - event="$new_view", - properties={}, - distinct_id="p2", - timestamp=datetime.now() - timedelta(days=5), - ) - flush_persons_and_events() - - filter = Filter( - data={ - "properties": { - "type": "OR", - "values": [ - { - "key": "$pageview", - "event_type": "events", - "time_interval": "day", - "time_value": 7, - "seq_time_interval": "day", - "seq_time_value": 3, - "seq_event": "$pageview", - "seq_event_type": "events", - "value": "performed_event_sequence", - "type": "behavioral", - }, - { - "key": "$new_view", - "event_type": "events", - "time_value": 2, - "time_interval": "week", - "seq_time_value": 1, - "seq_time_interval": "week", - "value": "restarted_performing_event", - "type": "behavioral", - }, - ], - } - } - ) - - q, params = CohortQuery(filter=filter, team=self.team).get_query() - res = sync_execute(q, {**params, **filter.hogql_context.values}) - - self.assertEqual(sorted([p1.uuid, p2.uuid]), sorted([r[0] for r in res])) - - def test_performed_event_sequence_with_extra_conditions(self): - p1 = _create_person( - team_id=self.team.pk, - distinct_ids=["p1"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - - _make_event_sequence(self.team, "p1", 2, [1, 1]) - - _create_event( - team=self.team, - event="$some_event", - properties={}, - distinct_id="p1", - timestamp=datetime.now() - timedelta(days=2), - ) - - _create_event( - team=self.team, - event="$some_event", - properties={}, - distinct_id="p1", - timestamp=datetime.now() - timedelta(days=4), - ) - - _create_person( - team_id=self.team.pk, - distinct_ids=["p2"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p2", - timestamp=datetime.now() - timedelta(days=2), - ) - flush_persons_and_events() - - filter = Filter( - data={ - "properties": { - "type": "AND", - "values": [ - { - "key": "$pageview", - "event_type": "events", - "time_interval": "day", - "time_value": 7, - "seq_time_interval": "day", - "seq_time_value": 3, - "seq_event": "$pageview", - "seq_event_type": "events", - "value": "performed_event_sequence", - "type": "behavioral", - }, - { - "key": "$pageview", - "event_type": "events", - "operator": "gte", - "operator_value": 1, - "time_value": 1, - "time_interval": "week", - "value": "performed_event_multiple", - "type": "behavioral", - }, - ], - } - } - ) - - q, params = CohortQuery(filter=filter, team=self.team).get_query() - res = sync_execute(q, {**params, **filter.hogql_context.values}) - - self.assertEqual([p1.uuid], [r[0] for r in res]) - - @snapshot_clickhouse_queries - def test_performed_event_sequence_with_person_properties(self): - p1 = _create_person( - team_id=self.team.pk, - distinct_ids=["p1"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - - _make_event_sequence(self.team, "p1", 2, [1, 1]) - - _create_event( - team=self.team, - event="$some_event", - properties={}, - distinct_id="p1", - timestamp=datetime.now() - timedelta(days=2), - ) - - _create_event( - team=self.team, - event="$some_event", - properties={}, - distinct_id="p1", - timestamp=datetime.now() - timedelta(days=4), - ) - - _create_person( - team_id=self.team.pk, - distinct_ids=["p2"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p2", - timestamp=datetime.now() - timedelta(days=2), - ) - - _create_person( - team_id=self.team.pk, - distinct_ids=["p3"], - properties={"name": "test22", "email": "test22@posthog.com"}, - ) - - _make_event_sequence(self.team, "p3", 2, [1, 1]) - - _create_event( - team=self.team, - event="$some_event", - properties={}, - distinct_id="p3", - timestamp=datetime.now() - timedelta(days=2), - ) - - _create_event( - team=self.team, - event="$some_event", - properties={}, - distinct_id="p3", - timestamp=datetime.now() - timedelta(days=4), - ) - - flush_persons_and_events() - - filter = Filter( - data={ - "properties": { - "type": "AND", - "values": [ - { - "key": "$pageview", - "event_type": "events", - "time_interval": "day", - "time_value": 7, - "seq_time_interval": "day", - "seq_time_value": 3, - "seq_event": "$pageview", - "seq_event_type": "events", - "value": "performed_event_sequence", - "type": "behavioral", - }, - { - "key": "$pageview", - "event_type": "events", - "operator": "gte", - "operator_value": 1, - "time_value": 1, - "time_interval": "week", - "value": "performed_event_multiple", - "type": "behavioral", - }, - { - "key": "email", - "value": "test@posthog.com", - "type": "person", - }, # pushed down - ], - } - } - ) - - q, params = CohortQuery(filter=filter, team=self.team).get_query() - res = sync_execute(q, {**params, **filter.hogql_context.values}) - - self.assertEqual([p1.uuid], [r[0] for r in res]) - - def test_multiple_performed_event_sequence(self): - p1 = _create_person( - team_id=self.team.pk, - distinct_ids=["p1"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - - _make_event_sequence(self.team, "p1", 2, [1, 1]) - - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p1", - timestamp=datetime.now() - timedelta(days=10), - ) - - _create_event( - team=self.team, - event="$new_view", - properties={}, - distinct_id="p1", - timestamp=datetime.now() - timedelta(days=9), - ) - - _create_person( - team_id=self.team.pk, - distinct_ids=["p2"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p2", - timestamp=datetime.now() - timedelta(days=10), - ) - - _create_event( - team=self.team, - event="$new_view", - properties={}, - distinct_id="p2", - timestamp=datetime.now() - timedelta(days=9), - ) - flush_persons_and_events() - - filter = Filter( - data={ - "properties": { - "type": "AND", - "values": [ - { - "key": "$pageview", - "event_type": "events", - "time_interval": "day", - "time_value": 7, - "seq_time_interval": "day", - "seq_time_value": 3, - "seq_event": "$pageview", - "seq_event_type": "events", - "value": "performed_event_sequence", - "type": "behavioral", - }, - { - "key": "$pageview", - "event_type": "events", - "time_interval": "week", - "time_value": 2, - "seq_time_interval": "day", - "seq_time_value": 2, - "seq_event": "$new_view", - "seq_event_type": "events", - "value": "performed_event_sequence", - "type": "behavioral", - }, - ], - } - } - ) - - q, params = CohortQuery(filter=filter, team=self.team).get_query() - res = sync_execute(q, {**params, **filter.hogql_context.values}) - - self.assertEqual([p1.uuid], [r[0] for r in res]) - - @snapshot_clickhouse_queries - def test_performed_event_sequence_and_clause_with_additional_event(self): - p1 = _create_person( - team_id=self.team.pk, - distinct_ids=["p1"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p1", - timestamp=datetime.now() - timedelta(days=6), - ) - - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p1", - timestamp=datetime.now() - timedelta(days=5), - ) - - p2 = _create_person( - team_id=self.team.pk, - distinct_ids=["p2"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - - _create_event( - team=self.team, - event="$new_view", - properties={}, - distinct_id="p2", - timestamp=datetime.now() - timedelta(days=3), - ) - flush_persons_and_events() - - filter = Filter( - data={ - "properties": { - "type": "OR", - "values": [ - { - "key": "$pageview", - "event_type": "events", - "time_interval": "day", - "time_value": 7, - "seq_time_interval": "day", - "seq_time_value": 3, - "seq_event": "$pageview", - "seq_event_type": "events", - "value": "performed_event_sequence", - "type": "behavioral", - }, - { - "key": "$new_view", - "event_type": "events", - "operator": "gte", - "operator_value": 1, - "time_value": 1, - "time_interval": "week", - "value": "performed_event_multiple", - "type": "behavioral", - }, - ], - } - } - ) - - q, params = CohortQuery(filter=filter, team=self.team).get_query() - res = sync_execute(q, {**params, **filter.hogql_context.values}) - - self.assertEqual({p1.uuid, p2.uuid}, {r[0] for r in res}) - - @snapshot_clickhouse_queries - def test_unwrapping_static_cohort_filter_hidden_in_layers_of_cohorts(self): - _create_person( - team_id=self.team.pk, - distinct_ids=["p1"], - properties={"name": "test", "name": "test"}, - ) - cohort_static = _create_cohort(team=self.team, name="cohort static", groups=[], is_static=True) - - p2 = _create_person( - team_id=self.team.pk, - distinct_ids=["p2"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - _create_event( - team=self.team, - event="$pageview", - properties={}, - distinct_id="p2", - timestamp=datetime.now() - timedelta(days=2), - ) - - p3 = _create_person(team_id=self.team.pk, distinct_ids=["p3"], properties={"name": "test"}) - _create_event( - team=self.team, - event="$new_view", - properties={}, - distinct_id="p3", - timestamp=datetime.now() - timedelta(days=1), - ) - - _create_person(team_id=self.team.pk, distinct_ids=["p4"], properties={"name": "test"}) - _create_event( - team=self.team, - event="$new_view", - properties={}, - distinct_id="p4", - timestamp=datetime.now() - timedelta(days=1), - ) - _create_person(team_id=self.team.pk, distinct_ids=["p5"], properties={"name": "test"}) - flush_persons_and_events() - cohort_static.insert_users_by_list(["p4", "p5"]) - - other_cohort = Cohort.objects.create( - team=self.team, - name="cohort other", - is_static=False, - filters={ - "properties": { - "type": "AND", - "values": [ - { - "key": "$new_view", - "event_type": "events", - "time_interval": "day", - "time_value": 7, - "value": "performed_event", - "type": "behavioral", - # p3, p4 fits in here - }, - { - "key": "id", - "value": cohort_static.pk, - "type": "cohort", - "negation": True, - # p4, p5 fits in here - }, - ], - } - }, - ) - - filter = Filter( - data={ - "properties": { - "type": "OR", - "values": [ - { - "key": "id", - "value": other_cohort.pk, - "type": "cohort", - }, # p3 fits in here - { - "key": "$pageview", - "event_type": "events", - "time_value": 1, - "time_interval": "week", - "value": "performed_event", - "type": "behavioral", - # p2 fits in here - }, - ], - } - } - ) - - q, params = CohortQuery(filter=filter, team=self.team).get_query() - res = sync_execute(q, {**params, **filter.hogql_context.values}) - - self.assertCountEqual([p2.uuid, p3.uuid], [r[0] for r in res]) - - def test_unwrap_with_negated_cohort(self): - _create_person( - team_id=self.team.pk, - distinct_ids=["p1"], - properties={"name": "test2", "email": "test@posthog.com"}, - ) - - _create_event( - team=self.team, - event="$new_view", - properties={}, - distinct_id="p1", - timestamp=datetime.now() - timedelta(days=6), - ) - _create_event( - team=self.team, - event="$some_event", - properties={}, - distinct_id="p1", - timestamp=datetime.now() - timedelta(days=6), - ) - - p2 = _create_person( - team_id=self.team.pk, - distinct_ids=["p2"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - - _create_event( - team=self.team, - event="$some_event", - properties={}, - distinct_id="p2", - timestamp=datetime.now() - timedelta(days=6), - ) - - _create_person( - team_id=self.team.pk, - distinct_ids=["p3"], - properties={"name": "test2", "email": "test@posthog.com"}, - ) - - _create_event( - team=self.team, - event="$some_event", - properties={}, - distinct_id="p3", - timestamp=datetime.now() - timedelta(days=6), - ) - - cohort1 = Cohort.objects.create( - team=self.team, - name="cohort 1", - is_static=False, - filters={ - "properties": { - "type": "AND", - "values": [ - { - "key": "$new_view", - "event_type": "events", - "time_interval": "day", - "time_value": 7, - "value": "performed_event", - "type": "behavioral", - } - ], - } - }, - ) - cohort2 = Cohort.objects.create( - team=self.team, - name="cohort 2", - is_static=False, - filters={ - "properties": { - "type": "AND", - "values": [ - { - "key": "$some_event", - "event_type": "events", - "time_interval": "day", - "time_value": 7, - "value": "performed_event", - "type": "behavioral", - }, - { - "key": "name", - "value": "test2", - "type": "person", - "negation": True, - }, - { - "key": "id", - "value": cohort1.pk, - "type": "cohort", - "negation": True, - }, - ], - } - }, - ) - - filter = Filter( - data={ - "properties": { - "type": "OR", - "values": [{"key": "id", "value": cohort2.pk, "type": "cohort"}], # p3 fits in here - } - }, - team=self.team, - ) - - q, params = CohortQuery(filter=filter, team=self.team).get_query() - res = sync_execute(q, {**params, **filter.hogql_context.values}) - - self.assertCountEqual([p2.uuid], [r[0] for r in res]) - - def test_unwrap_multiple_levels(self): - _create_person( - team_id=self.team.pk, - distinct_ids=["p1"], - properties={"name": "test2", "email": "test@posthog.com"}, - ) - - _create_event( - team=self.team, - event="$new_view", - properties={}, - distinct_id="p1", - timestamp=datetime.now() - timedelta(days=6), - ) - _create_event( - team=self.team, - event="$some_event", - properties={}, - distinct_id="p1", - timestamp=datetime.now() - timedelta(days=6), - ) - - _create_person( - team_id=self.team.pk, - distinct_ids=["p2"], - properties={"name": "test", "email": "test@posthog.com"}, - ) - - _create_event( - team=self.team, - event="$some_event", - properties={}, - distinct_id="p2", - timestamp=datetime.now() - timedelta(days=6), - ) - - _create_person( - team_id=self.team.pk, - distinct_ids=["p3"], - properties={"name": "test2", "email": "test@posthog.com"}, - ) - - _create_event( - team=self.team, - event="$some_event", - properties={}, - distinct_id="p3", - timestamp=datetime.now() - timedelta(days=6), - ) - - p4 = _create_person( - team_id=self.team.pk, - distinct_ids=["p4"], - properties={"name": "test3", "email": "test@posthog.com"}, - ) - - _create_event( - team=self.team, - event="$target_event", - properties={}, - distinct_id="p4", - timestamp=datetime.now() - timedelta(days=6), - ) - - cohort1 = Cohort.objects.create( - team=self.team, - name="cohort 1", - is_static=False, - filters={ - "properties": { - "type": "AND", - "values": [ - { - "key": "$new_view", - "event_type": "events", - "time_interval": "day", - "time_value": 7, - "value": "performed_event", - "type": "behavioral", - } - ], - } - }, - ) - cohort2 = Cohort.objects.create( - team=self.team, - name="cohort 2", - is_static=False, - filters={ - "properties": { - "type": "AND", - "values": [ - { - "key": "$some_event", - "event_type": "events", - "time_interval": "day", - "time_value": 7, - "value": "performed_event", - "type": "behavioral", - }, - { - "key": "name", - "value": "test2", - "type": "person", - "negation": True, - }, - { - "key": "id", - "value": cohort1.pk, - "type": "cohort", - "negation": True, - }, - ], - } - }, - ) - - cohort3 = Cohort.objects.create( - team=self.team, - name="cohort 3", - is_static=False, - filters={ - "properties": { - "type": "AND", - "values": [ - {"key": "name", "value": "test3", "type": "person"}, - { - "key": "id", - "value": cohort2.pk, - "type": "cohort", - "negation": True, - }, - ], - } - }, - ) - - filter = Filter( - data={ - "properties": { - "type": "OR", - "values": [{"key": "id", "value": cohort3.pk, "type": "cohort"}], - } - }, - team=self.team, - ) - - q, params = CohortQuery(filter=filter, team=self.team).get_query() - res = sync_execute(q, {**params, **filter.hogql_context.values}) - - self.assertCountEqual([p4.uuid], [r[0] for r in res]) - - -class TestCohortNegationValidation(BaseTest): - def test_basic_valid_negation_tree(self): - property_group = PropertyGroup( - type=PropertyOperatorType.AND, - values=[ - Property(key="name", value="test", type="person"), - Property(key="email", value="xxx", type="person", negation=True), - ], - ) - - has_pending_neg, has_reg = check_negation_clause(property_group) - self.assertEqual(has_pending_neg, False) - self.assertEqual(has_reg, True) - - def test_valid_negation_tree_with_extra_layers(self): - property_group = PropertyGroup( - type=PropertyOperatorType.OR, - values=[ - PropertyGroup( - type=PropertyOperatorType.AND, - values=[Property(key="name", value="test", type="person")], - ), - PropertyGroup( - type=PropertyOperatorType.AND, - values=[ - PropertyGroup( - type=PropertyOperatorType.OR, - values=[ - Property( - key="email", - value="xxx", - type="person", - negation=True, - ) - ], - ), - PropertyGroup( - type=PropertyOperatorType.OR, - values=[Property(key="email", value="xxx", type="person")], - ), - ], - ), - ], - ) - - has_pending_neg, has_reg = check_negation_clause(property_group) - self.assertEqual(has_pending_neg, False) - self.assertEqual(has_reg, True) - - def test_invalid_negation_tree_with_extra_layers(self): - property_group = PropertyGroup( - type=PropertyOperatorType.OR, - values=[ - PropertyGroup( - type=PropertyOperatorType.AND, - values=[Property(key="name", value="test", type="person")], - ), - PropertyGroup( - type=PropertyOperatorType.AND, - values=[ - PropertyGroup( - type=PropertyOperatorType.OR, - values=[ - Property( - key="email", - value="xxx", - type="person", - negation=True, - ) - ], - ), - PropertyGroup( - type=PropertyOperatorType.OR, - values=[ - Property( - key="email", - value="xxx", - type="person", - negation=True, - ) - ], - ), - ], - ), - ], - ) - - has_pending_neg, has_reg = check_negation_clause(property_group) - self.assertEqual(has_pending_neg, True) - self.assertEqual(has_reg, True) - - def test_valid_negation_tree_with_extra_layers_recombining_at_top(self): - property_group = PropertyGroup( - type=PropertyOperatorType.AND, # top level AND protects the 2 negations from being invalid - values=[ - PropertyGroup( - type=PropertyOperatorType.OR, - values=[Property(key="name", value="test", type="person")], - ), - PropertyGroup( - type=PropertyOperatorType.AND, - values=[ - PropertyGroup( - type=PropertyOperatorType.OR, - values=[ - Property( - key="email", - value="xxx", - type="person", - negation=True, - ) - ], - ), - PropertyGroup( - type=PropertyOperatorType.OR, - values=[ - Property( - key="email", - value="xxx", - type="person", - negation=True, - ) - ], - ), - ], - ), - ], - ) - - has_pending_neg, has_reg = check_negation_clause(property_group) - self.assertEqual(has_pending_neg, False) - self.assertEqual(has_reg, True) - - def test_invalid_negation_tree_no_positive_filter(self): - property_group = PropertyGroup( - type=PropertyOperatorType.AND, - values=[ - PropertyGroup( - type=PropertyOperatorType.OR, - values=[Property(key="name", value="test", type="person", negation=True)], - ), - PropertyGroup( - type=PropertyOperatorType.AND, - values=[ - PropertyGroup( - type=PropertyOperatorType.OR, - values=[ - Property( - key="email", - value="xxx", - type="person", - negation=True, - ) - ], - ), - PropertyGroup( - type=PropertyOperatorType.OR, - values=[ - Property( - key="email", - value="xxx", - type="person", - negation=True, - ) - ], - ), - ], - ), - ], - ) - - has_pending_neg, has_reg = check_negation_clause(property_group) - self.assertEqual(has_pending_neg, True) - self.assertEqual(has_reg, False) - - def test_empty_property_group(self): - property_group = PropertyGroup(type=PropertyOperatorType.AND, values=[]) # type: ignore - - has_pending_neg, has_reg = check_negation_clause(property_group) - self.assertEqual(has_pending_neg, False) - self.assertEqual(has_reg, False) - - def test_basic_invalid_negation_tree(self): - property_group = PropertyGroup( - type=PropertyOperatorType.AND, - values=[Property(key="email", value="xxx", type="person", negation=True)], - ) - - has_pending_neg, has_reg = check_negation_clause(property_group) - self.assertEqual(has_pending_neg, True) - self.assertEqual(has_reg, False) - - def test_basic_valid_negation_tree_with_no_negations(self): - property_group = PropertyGroup( - type=PropertyOperatorType.AND, - values=[Property(key="name", value="test", type="person")], - ) - - has_pending_neg, has_reg = check_negation_clause(property_group) - self.assertEqual(has_pending_neg, False) - self.assertEqual(has_reg, True) diff --git a/ee/clickhouse/queries/test/test_column_optimizer.py b/ee/clickhouse/queries/test/test_column_optimizer.py deleted file mode 100644 index 296f3d18b3..0000000000 --- a/ee/clickhouse/queries/test/test_column_optimizer.py +++ /dev/null @@ -1,260 +0,0 @@ -from ee.clickhouse.materialized_columns.columns import materialize -from ee.clickhouse.queries.column_optimizer import EnterpriseColumnOptimizer -from posthog.models import Action -from posthog.models.filters import Filter, RetentionFilter -from posthog.test.base import ( - APIBaseTest, - ClickhouseTestMixin, - cleanup_materialized_columns, -) - -PROPERTIES_OF_ALL_TYPES = [ - {"key": "event_prop", "value": ["foo", "bar"], "type": "event"}, - {"key": "person_prop", "value": "efg", "type": "person"}, - {"key": "id", "value": 1, "type": "cohort"}, - {"key": "tag_name", "value": ["label"], "operator": "exact", "type": "element"}, - { - "key": "group_prop", - "value": ["value"], - "operator": "exact", - "type": "group", - "group_type_index": 2, - }, -] - -BASE_FILTER = Filter({"events": [{"id": "$pageview", "type": "events", "order": 0}]}) -FILTER_WITH_PROPERTIES = BASE_FILTER.shallow_clone({"properties": PROPERTIES_OF_ALL_TYPES}) -FILTER_WITH_GROUPS = BASE_FILTER.shallow_clone({"properties": {"type": "AND", "values": PROPERTIES_OF_ALL_TYPES}}) - - -class TestColumnOptimizer(ClickhouseTestMixin, APIBaseTest): - def setUp(self): - super().setUp() - self.team.test_account_filters = PROPERTIES_OF_ALL_TYPES - self.team.save() - - cleanup_materialized_columns() - - def test_properties_used_in_filter(self): - properties_used_in_filter = lambda filter: EnterpriseColumnOptimizer( - filter, self.team.id - ).properties_used_in_filter - - self.assertEqual(properties_used_in_filter(BASE_FILTER), {}) - self.assertEqual( - properties_used_in_filter(FILTER_WITH_PROPERTIES), - { - ("event_prop", "event", None): 1, - ("person_prop", "person", None): 1, - ("id", "cohort", None): 1, - ("tag_name", "element", None): 1, - ("group_prop", "group", 2): 1, - }, - ) - self.assertEqual( - properties_used_in_filter(FILTER_WITH_GROUPS), - { - ("event_prop", "event", None): 1, - ("person_prop", "person", None): 1, - ("id", "cohort", None): 1, - ("tag_name", "element", None): 1, - ("group_prop", "group", 2): 1, - }, - ) - - # Breakdown cases - filter = BASE_FILTER.shallow_clone({"breakdown": "some_prop", "breakdown_type": "person"}) - self.assertEqual(properties_used_in_filter(filter), {("some_prop", "person", None): 1}) - - filter = BASE_FILTER.shallow_clone({"breakdown": "some_prop", "breakdown_type": "event"}) - self.assertEqual(properties_used_in_filter(filter), {("some_prop", "event", None): 1}) - - filter = BASE_FILTER.shallow_clone({"breakdown": [11], "breakdown_type": "cohort"}) - self.assertEqual(properties_used_in_filter(filter), {}) - - filter = BASE_FILTER.shallow_clone( - { - "breakdown": "some_prop", - "breakdown_type": "group", - "breakdown_group_type_index": 1, - } - ) - self.assertEqual(properties_used_in_filter(filter), {("some_prop", "group", 1): 1}) - - # Funnel Correlation cases - filter = BASE_FILTER.shallow_clone( - { - "funnel_correlation_type": "events", - "funnel_correlation_names": ["random_column"], - } - ) - self.assertEqual(properties_used_in_filter(filter), {}) - - filter = BASE_FILTER.shallow_clone( - { - "funnel_correlation_type": "properties", - "funnel_correlation_names": ["random_column", "$browser"], - } - ) - self.assertEqual( - properties_used_in_filter(filter), - {("random_column", "person", None): 1, ("$browser", "person", None): 1}, - ) - - filter = BASE_FILTER.shallow_clone( - { - "funnel_correlation_type": "properties", - "funnel_correlation_names": ["random_column", "$browser"], - "aggregation_group_type_index": 2, - } - ) - self.assertEqual( - properties_used_in_filter(filter), - {("random_column", "group", 2): 1, ("$browser", "group", 2): 1}, - ) - - filter = BASE_FILTER.shallow_clone({"funnel_correlation_type": "properties"}) - self.assertEqual(properties_used_in_filter(filter), {}) - - filter = Filter( - data={ - "events": [ - { - "id": "$pageview", - "type": "events", - "order": 0, - "math": "sum", - "math_property": "numeric_prop", - "properties": PROPERTIES_OF_ALL_TYPES, - } - ] - } - ) - self.assertEqual( - properties_used_in_filter(filter), - { - ("numeric_prop", "event", None): 1, - ("event_prop", "event", None): 1, - ("person_prop", "person", None): 1, - ("id", "cohort", None): 1, - ("tag_name", "element", None): 1, - ("group_prop", "group", 2): 1, - }, - ) - - filter = Filter( - data={ - "events": [ - { - "id": "$pageview", - "type": "events", - "order": 0, - "math": "unique_group", - "math_group_type_index": 1, - } - ] - } - ) - self.assertEqual(properties_used_in_filter(filter), {("$group_1", "event", None): 1}) - - filter = Filter( - data={ - "events": [ - { - "id": "$pageview", - "type": "events", - "order": 0, - "math": "unique_session", - } - ] - } - ) - self.assertEqual(properties_used_in_filter(filter), {("$session_id", "event", None): 1}) - - def test_properties_used_in_filter_with_actions(self): - action = Action.objects.create( - team=self.team, - steps_json=[ - { - "event": "$autocapture", - "url": "https://example.com/donate", - "url_matching": "exact", - }, - { - "event": "$autocapture", - "tag_name": "button", - "text": "Pay $10", - "properties": [{"key": "$browser", "value": "Chrome", "type": "person"}], - }, - ], - ) - - filter = Filter(data={"actions": [{"id": action.id, "math": "dau"}]}) - self.assertEqual( - EnterpriseColumnOptimizer(filter, self.team.id).properties_used_in_filter, - {("$current_url", "event", None): 1, ("$browser", "person", None): 1}, - ) - - filter = BASE_FILTER.shallow_clone({"exclusions": [{"id": action.id, "type": "actions"}]}) - self.assertEqual( - EnterpriseColumnOptimizer(filter, self.team.id).properties_used_in_filter, - {("$current_url", "event", None): 1, ("$browser", "person", None): 1}, - ) - - retention_filter = RetentionFilter(data={"target_entity": {"id": action.id, "type": "actions"}}) - self.assertEqual( - EnterpriseColumnOptimizer(retention_filter, self.team.id).properties_used_in_filter, - {("$current_url", "event", None): 2, ("$browser", "person", None): 2}, - ) - - def test_materialized_columns_checks(self): - optimizer = lambda: EnterpriseColumnOptimizer(FILTER_WITH_PROPERTIES, self.team.id) - optimizer_groups = lambda: EnterpriseColumnOptimizer(FILTER_WITH_GROUPS, self.team.id) - - self.assertEqual(optimizer().event_columns_to_query, {"properties"}) - self.assertEqual(optimizer().person_columns_to_query, {"properties"}) - self.assertEqual(optimizer_groups().event_columns_to_query, {"properties"}) - self.assertEqual(optimizer_groups().person_columns_to_query, {"properties"}) - - materialize("events", "event_prop") - materialize("person", "person_prop") - - self.assertEqual(optimizer().event_columns_to_query, {"mat_event_prop"}) - self.assertEqual(optimizer().person_columns_to_query, {"pmat_person_prop"}) - self.assertEqual(optimizer_groups().event_columns_to_query, {"mat_event_prop"}) - self.assertEqual(optimizer_groups().person_columns_to_query, {"pmat_person_prop"}) - - def test_materialized_columns_checks_person_on_events(self): - optimizer = lambda: EnterpriseColumnOptimizer( - BASE_FILTER.shallow_clone( - { - "properties": [ - { - "key": "person_prop", - "value": ["value"], - "operator": "exact", - "type": "person", - }, - ] - } - ), - self.team.id, - ) - - self.assertEqual(optimizer().person_on_event_columns_to_query, {"person_properties"}) - - # materialising the props on `person` table should make no difference - materialize("person", "person_prop") - - self.assertEqual(optimizer().person_on_event_columns_to_query, {"person_properties"}) - - materialize("events", "person_prop", table_column="person_properties") - - self.assertEqual(optimizer().person_on_event_columns_to_query, {"mat_pp_person_prop"}) - - def test_group_types_to_query(self): - group_types_to_query = lambda filter: EnterpriseColumnOptimizer(filter, self.team.id).group_types_to_query - - self.assertEqual(group_types_to_query(BASE_FILTER), set()) - self.assertEqual(group_types_to_query(FILTER_WITH_PROPERTIES), {2}) - self.assertEqual(group_types_to_query(FILTER_WITH_GROUPS), {2}) diff --git a/ee/clickhouse/queries/test/test_event_query.py b/ee/clickhouse/queries/test/test_event_query.py deleted file mode 100644 index b37fba0bfa..0000000000 --- a/ee/clickhouse/queries/test/test_event_query.py +++ /dev/null @@ -1,748 +0,0 @@ -from freezegun import freeze_time - -from ee.clickhouse.materialized_columns.columns import materialize -from posthog.client import sync_execute -from posthog.models import Action -from posthog.models.cohort import Cohort -from posthog.models.element import Element -from posthog.models.entity import Entity -from posthog.models.filters import Filter -from posthog.models.group.util import create_group -from posthog.models.group_type_mapping import GroupTypeMapping -from posthog.models.person import Person -from posthog.queries.trends.trends_event_query import TrendsEventQuery -from posthog.test.base import ( - APIBaseTest, - ClickhouseTestMixin, - _create_event, - _create_person, - snapshot_clickhouse_queries, -) - - -def _create_cohort(**kwargs): - team = kwargs.pop("team") - name = kwargs.pop("name") - groups = kwargs.pop("groups") - is_static = kwargs.pop("is_static", False) - cohort = Cohort.objects.create(team=team, name=name, groups=groups, is_static=is_static) - return cohort - - -class TestEventQuery(ClickhouseTestMixin, APIBaseTest): - def setUp(self): - super().setUp() - self._create_sample_data() - - def _create_sample_data(self): - distinct_id = "user_one_{}".format(self.team.pk) - _create_person(distinct_ids=[distinct_id], team=self.team) - - _create_event( - event="viewed", - distinct_id=distinct_id, - team=self.team, - timestamp="2021-05-01 00:00:00", - ) - - def _run_query(self, filter: Filter, entity=None): - entity = entity or filter.entities[0] - - query, params = TrendsEventQuery( - filter=filter, - entity=entity, - team=self.team, - person_on_events_mode=self.team.person_on_events_mode, - ).get_query() - - result = sync_execute(query, {**params, **filter.hogql_context.values}) - - return result, query - - @snapshot_clickhouse_queries - def test_basic_event_filter(self): - self._run_query( - Filter( - data={ - "date_from": "2021-05-01 00:00:00", - "date_to": "2021-05-07 00:00:00", - "events": [{"id": "viewed", "order": 0}], - } - ) - ) - - def test_person_properties_filter(self): - filter = Filter( - data={ - "date_from": "2021-05-01 00:00:00", - "date_to": "2021-05-07 00:00:00", - "events": [{"id": "viewed", "order": 0}], - "properties": [ - { - "key": "email", - "value": "@posthog.com", - "operator": "not_icontains", - "type": "person", - }, - {"key": "key", "value": "val"}, - ], - } - ) - - entity = Entity({"id": "viewed", "type": "events"}) - - self._run_query(filter, entity) - - entity = Entity( - { - "id": "viewed", - "type": "events", - "properties": [ - { - "key": "email", - "value": "@posthog.com", - "operator": "not_icontains", - "type": "person", - }, - {"key": "key", "value": "val"}, - ], - } - ) - - filter = Filter( - data={ - "date_from": "2021-05-01 00:00:00", - "date_to": "2021-05-07 00:00:00", - "events": [entity.to_dict()], - } - ) - - self._run_query(filter, entity) - - @snapshot_clickhouse_queries - def test_event_properties_filter(self): - filter = Filter( - data={ - "date_from": "2021-05-01 00:00:00", - "date_to": "2021-05-07 00:00:00", - "events": [{"id": "viewed", "order": 0}], - "properties": [ - { - "key": "some_key", - "value": "test_val", - "operator": "exact", - "type": "event", - } - ], - } - ) - - entity = Entity({"id": "viewed", "type": "events"}) - - self._run_query(filter, entity) - - filter = Filter( - data={ - "date_from": "2021-05-01 00:00:00", - "date_to": "2021-05-07 00:00:00", - "events": [{"id": "viewed", "order": 0}], - } - ) - - entity = Entity( - { - "id": "viewed", - "type": "events", - "properties": [ - { - "key": "some_key", - "value": "test_val", - "operator": "exact", - "type": "event", - } - ], - } - ) - - self._run_query(filter, entity) - - # just smoke test making sure query runs because no new functions are used here - @snapshot_clickhouse_queries - def test_cohort_filter(self): - cohort = _create_cohort( - team=self.team, - name="cohort1", - groups=[{"properties": [{"key": "name", "value": "test", "type": "person"}]}], - ) - - filter = Filter( - data={ - "date_from": "2021-05-01 00:00:00", - "date_to": "2021-05-07 00:00:00", - "events": [{"id": "viewed", "order": 0}], - "properties": [{"key": "id", "value": cohort.pk, "type": "cohort"}], - } - ) - - self._run_query(filter) - - # just smoke test making sure query runs because no new functions are used here - @snapshot_clickhouse_queries - def test_entity_filtered_by_cohort(self): - cohort = _create_cohort( - team=self.team, - name="cohort1", - groups=[{"properties": [{"key": "name", "value": "test", "type": "person"}]}], - ) - - filter = Filter( - data={ - "date_from": "2021-05-01 00:00:00", - "date_to": "2021-05-07 00:00:00", - "events": [ - { - "id": "$pageview", - "order": 0, - "properties": [{"key": "id", "type": "cohort", "value": cohort.pk}], - } - ], - } - ) - - Person.objects.create(team_id=self.team.pk, distinct_ids=["p1"], properties={"name": "test"}) - _create_event( - team=self.team, - event="$pageview", - distinct_id="p1", - timestamp="2020-01-02T12:00:00Z", - ) - - Person.objects.create(team_id=self.team.pk, distinct_ids=["p2"], properties={"name": "foo"}) - _create_event( - team=self.team, - event="$pageview", - distinct_id="p2", - timestamp="2020-01-02T12:01:00Z", - ) - - self._run_query(filter) - - # smoke test make sure query is formatted and runs - @snapshot_clickhouse_queries - def test_static_cohort_filter(self): - cohort = _create_cohort(team=self.team, name="cohort1", groups=[], is_static=True) - - filter = Filter( - data={ - "date_from": "2021-05-01 00:00:00", - "date_to": "2021-05-07 00:00:00", - "events": [{"id": "viewed", "order": 0}], - "properties": [{"key": "id", "value": cohort.pk, "type": "cohort"}], - }, - team=self.team, - ) - - self._run_query(filter) - - @snapshot_clickhouse_queries - @freeze_time("2021-01-21") - def test_account_filters(self): - Person.objects.create(team_id=self.team.pk, distinct_ids=["person_1"], properties={"name": "John"}) - Person.objects.create(team_id=self.team.pk, distinct_ids=["person_2"], properties={"name": "Jane"}) - - _create_event(event="event_name", team=self.team, distinct_id="person_1") - _create_event(event="event_name", team=self.team, distinct_id="person_2") - _create_event(event="event_name", team=self.team, distinct_id="person_2") - - cohort = Cohort.objects.create( - team=self.team, - name="cohort1", - groups=[{"properties": [{"key": "name", "value": "Jane", "type": "person"}]}], - ) - cohort.calculate_people_ch(pending_version=0) - - self.team.test_account_filters = [{"key": "id", "value": cohort.pk, "type": "cohort"}] - self.team.save() - - filter = Filter( - data={ - "events": [{"id": "event_name", "order": 0}], - "filter_test_accounts": True, - }, - team=self.team, - ) - - self._run_query(filter) - - def test_action_with_person_property_filter(self): - Person.objects.create(team_id=self.team.pk, distinct_ids=["person_1"], properties={"name": "John"}) - Person.objects.create(team_id=self.team.pk, distinct_ids=["person_2"], properties={"name": "Jane"}) - - _create_event(event="event_name", team=self.team, distinct_id="person_1") - _create_event(event="event_name", team=self.team, distinct_id="person_2") - _create_event(event="event_name", team=self.team, distinct_id="person_2") - - action = Action.objects.create( - team=self.team, - name="action1", - steps_json=[{"event": "event_name", "properties": [{"key": "name", "type": "person", "value": "John"}]}], - ) - - filter = Filter(data={"actions": [{"id": action.id, "type": "actions", "order": 0}]}) - - self._run_query(filter) - - @snapshot_clickhouse_queries - def test_denormalised_props(self): - filters = { - "events": [ - { - "id": "user signed up", - "type": "events", - "order": 0, - "properties": [{"key": "test_prop", "value": "hi"}], - } - ], - "date_from": "2020-01-01", - "properties": [{"key": "test_prop", "value": "hi"}], - "date_to": "2020-01-14", - } - - materialize("events", "test_prop") - - Person.objects.create(team_id=self.team.pk, distinct_ids=["p1"], properties={"key": "value"}) - _create_event( - team=self.team, - event="$pageview", - distinct_id="p1", - timestamp="2020-01-02T12:00:00Z", - properties={"test_prop": "hi"}, - ) - - Person.objects.create(team_id=self.team.pk, distinct_ids=["p2"], properties={"key_2": "value_2"}) - _create_event( - team=self.team, - event="$pageview", - distinct_id="p2", - timestamp="2020-01-02T12:00:00Z", - properties={"test_prop": "hi"}, - ) - - filter = Filter(data=filters) - _, query = self._run_query(filter) - self.assertIn("mat_test_prop", query) - - @snapshot_clickhouse_queries - @freeze_time("2021-01-21") - def test_element(self): - _create_event( - event="$autocapture", - team=self.team, - distinct_id="whatever", - properties={"attr": "some_other_val"}, - elements=[ - Element( - tag_name="a", - href="/a-url", - attr_class=["small"], - text="bla bla", - attributes={}, - nth_child=1, - nth_of_type=0, - ), - Element( - tag_name="button", - attr_class=["btn", "btn-primary"], - nth_child=0, - nth_of_type=0, - ), - Element(tag_name="div", nth_child=0, nth_of_type=0), - Element(tag_name="label", nth_child=0, nth_of_type=0, attr_id="nested"), - ], - ) - _create_event( - event="$pageview", - team=self.team, - distinct_id="whatever", - properties={"attr": "some_val"}, - elements=[ - Element( - tag_name="a", - href="/a-url", - attr_class=["small"], - text="bla bla", - attributes={}, - nth_child=1, - nth_of_type=0, - ), - Element( - tag_name="button", - attr_class=["btn", "btn-secondary"], - nth_child=0, - nth_of_type=0, - ), - Element(tag_name="div", nth_child=0, nth_of_type=0), - Element(tag_name="img", nth_child=0, nth_of_type=0, attr_id="nested"), - ], - ) - - filter = Filter( - data={ - "events": [{"id": "event_name", "order": 0}], - "properties": [ - { - "key": "tag_name", - "value": ["label"], - "operator": "exact", - "type": "element", - } - ], - } - ) - - self._run_query(filter) - - self._run_query( - filter.shallow_clone( - { - "properties": [ - { - "key": "tag_name", - "value": [], - "operator": "exact", - "type": "element", - } - ] - } - ) - ) - - def _create_groups_test_data(self): - GroupTypeMapping.objects.create( - team=self.team, project_id=self.team.project_id, group_type="organization", group_type_index=0 - ) - GroupTypeMapping.objects.create( - team=self.team, project_id=self.team.project_id, group_type="company", group_type_index=1 - ) - - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key="org:5", - properties={"industry": "finance"}, - ) - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key="org:6", - properties={"industry": "technology"}, - ) - create_group( - team_id=self.team.pk, - group_type_index=1, - group_key="company:1", - properties={"another": "value"}, - ) - - Person.objects.create(team_id=self.team.pk, distinct_ids=["p1"], properties={"$browser": "test"}) - Person.objects.create(team_id=self.team.pk, distinct_ids=["p2"], properties={"$browser": "foobar"}) - Person.objects.create(team_id=self.team.pk, distinct_ids=["p3"], properties={"$browser": "test"}) - - _create_event( - team=self.team, - event="$pageview", - distinct_id="p1", - timestamp="2020-01-02T12:00:00Z", - properties={"$group_0": "org:5", "$group_1": "company:1"}, - ) - _create_event( - team=self.team, - event="$pageview", - distinct_id="p2", - timestamp="2020-01-02T12:00:00Z", - properties={"$group_0": "org:6", "$group_1": "company:1"}, - ) - _create_event( - team=self.team, - event="$pageview", - distinct_id="p1", - timestamp="2020-01-02T12:00:00Z", - properties={"$group_0": "org:6"}, - ) - _create_event( - team=self.team, - event="$pageview", - distinct_id="p3", - timestamp="2020-01-02T12:00:00Z", - properties={"$group_0": "org:5"}, - ) - - @snapshot_clickhouse_queries - def test_groups_filters(self): - self._create_groups_test_data() - - filter = Filter( - { - "date_from": "2020-01-01T00:00:00Z", - "date_to": "2020-01-12T00:00:00Z", - "events": [{"id": "$pageview", "type": "events", "order": 0}], - "properties": [ - { - "key": "industry", - "value": "finance", - "type": "group", - "group_type_index": 0, - }, - { - "key": "another", - "value": "value", - "type": "group", - "group_type_index": 1, - }, - ], - }, - team=self.team, - ) - - results, _ = self._run_query(filter) - self.assertEqual(len(results), 1) - - @snapshot_clickhouse_queries - def test_groups_filters_mixed(self): - self._create_groups_test_data() - - filter = Filter( - { - "date_from": "2020-01-01T00:00:00Z", - "date_to": "2020-01-12T00:00:00Z", - "events": [{"id": "$pageview", "type": "events", "order": 0}], - "properties": [ - { - "key": "industry", - "value": "finance", - "type": "group", - "group_type_index": 0, - }, - {"key": "$browser", "value": "test", "type": "person"}, - ], - }, - team=self.team, - ) - - results, _ = self._run_query(filter) - self.assertEqual(len(results), 2) - - @snapshot_clickhouse_queries - def test_entity_filtered_by_session_duration(self): - filter = Filter( - data={ - "date_from": "2021-05-02 00:00:00", - "date_to": "2021-05-03 00:00:00", - "events": [ - { - "id": "$pageview", - "order": 0, - "properties": [ - { - "key": "$session_duration", - "type": "session", - "operator": "gt", - "value": 90, - } - ], - } - ], - } - ) - - event_timestamp_str = "2021-05-02 00:01:00" - - # Session starts before the date_from - _create_event( - team=self.team, - event="start", - distinct_id="p1", - timestamp="2021-05-01 23:59:00", - properties={"$session_id": "1abc"}, - ) - # Event that should be returned - _create_event( - team=self.team, - event="$pageview", - distinct_id="p1", - timestamp=event_timestamp_str, - properties={"$session_id": "1abc"}, - ) - - # Event in a session that's too short - _create_event( - team=self.team, - event="$pageview", - distinct_id="p2", - timestamp="2021-05-02 00:02:00", - properties={"$session_id": "2abc"}, - ) - _create_event( - team=self.team, - event="final_event", - distinct_id="p2", - timestamp="2021-05-02 00:02:01", - properties={"$session_id": "2abc"}, - ) - - # Event with no session - _create_event( - team=self.team, - event="$pageview", - distinct_id="p2", - timestamp="2021-05-02 00:02:00", - ) - - results, _ = self._run_query(filter) - self.assertEqual(len(results), 1) - self.assertEqual(results[0][0].strftime("%Y-%m-%d %H:%M:%S"), event_timestamp_str) - - @snapshot_clickhouse_queries - def test_entity_filtered_by_multiple_session_duration_filters(self): - filter = Filter( - data={ - "date_from": "2021-05-02 00:00:00", - "date_to": "2021-05-03 00:00:00", - "events": [ - { - "id": "$pageview", - "order": 0, - "properties": [ - { - "key": "$session_duration", - "type": "session", - "operator": "gt", - "value": 90, - }, - { - "key": "$session_duration", - "type": "session", - "operator": "lt", - "value": 150, - }, - ], - } - ], - } - ) - - event_timestamp_str = "2021-05-02 00:01:00" - - # 120s session - _create_event( - team=self.team, - event="start", - distinct_id="p1", - timestamp="2021-05-01 23:59:00", - properties={"$session_id": "1abc"}, - ) - _create_event( - team=self.team, - event="$pageview", - distinct_id="p1", - timestamp=event_timestamp_str, - properties={"$session_id": "1abc"}, - ) - - # 1s session (too short) - _create_event( - team=self.team, - event="$pageview", - distinct_id="p2", - timestamp="2021-05-02 00:02:00", - properties={"$session_id": "2abc"}, - ) - _create_event( - team=self.team, - event="final_event", - distinct_id="p2", - timestamp="2021-05-02 00:02:01", - properties={"$session_id": "2abc"}, - ) - - # 600s session (too long) - _create_event( - team=self.team, - event="$pageview", - distinct_id="p2", - timestamp="2021-05-02 00:02:00", - properties={"$session_id": "3abc"}, - ) - _create_event( - team=self.team, - event="final_event", - distinct_id="p2", - timestamp="2021-05-02 00:07:00", - properties={"$session_id": "3abc"}, - ) - - results, _ = self._run_query(filter) - self.assertEqual(len(results), 1) - self.assertEqual(results[0][0].strftime("%Y-%m-%d %H:%M:%S"), event_timestamp_str) - - @snapshot_clickhouse_queries - def test_unique_session_math_filtered_by_session_duration(self): - filter = Filter( - data={ - "date_from": "2021-05-02 00:00:00", - "date_to": "2021-05-03 00:00:00", - "events": [ - { - "id": "$pageview", - "math": "unique_session", - "order": 0, - "properties": [ - { - "key": "$session_duration", - "type": "session", - "operator": "gt", - "value": 30, - } - ], - } - ], - } - ) - - event_timestamp_str = "2021-05-02 00:01:00" - - # Session that should be returned - _create_event( - team=self.team, - event="start", - distinct_id="p1", - timestamp="2021-05-02 00:00:00", - properties={"$session_id": "1abc"}, - ) - _create_event( - team=self.team, - event="$pageview", - distinct_id="p1", - timestamp=event_timestamp_str, - properties={"$session_id": "1abc"}, - ) - - # Session that's too short - _create_event( - team=self.team, - event="$pageview", - distinct_id="p2", - timestamp="2021-05-02 00:02:00", - properties={"$session_id": "2abc"}, - ) - _create_event( - team=self.team, - event="final_event", - distinct_id="p2", - timestamp="2021-05-02 00:02:01", - properties={"$session_id": "2abc"}, - ) - - results, _ = self._run_query(filter) - self.assertEqual(len(results), 1) - self.assertEqual(results[0][0].strftime("%Y-%m-%d %H:%M:%S"), event_timestamp_str) diff --git a/ee/clickhouse/queries/test/test_experiments.py b/ee/clickhouse/queries/test/test_experiments.py deleted file mode 100644 index 95aacc8ca0..0000000000 --- a/ee/clickhouse/queries/test/test_experiments.py +++ /dev/null @@ -1,235 +0,0 @@ -import json -import unittest -from ee.clickhouse.queries.experiments.funnel_experiment_result import ( - validate_event_variants as validate_funnel_event_variants, -) -from ee.clickhouse.queries.experiments.trend_experiment_result import ( - validate_event_variants as validate_trend_event_variants, -) -from rest_framework.exceptions import ValidationError - -from posthog.constants import ExperimentNoResultsErrorKeys - - -class TestFunnelExperiments(unittest.TestCase): - def test_validate_event_variants_no_events(self): - funnel_results = [] - - expected_errors = json.dumps( - { - ExperimentNoResultsErrorKeys.NO_EVENTS: True, - ExperimentNoResultsErrorKeys.NO_FLAG_INFO: True, - ExperimentNoResultsErrorKeys.NO_CONTROL_VARIANT: True, - ExperimentNoResultsErrorKeys.NO_TEST_VARIANT: True, - } - ) - - with self.assertRaises(ValidationError) as context: - validate_funnel_event_variants(funnel_results, ["test", "control"]) - - self.assertEqual(context.exception.detail[0], expected_errors) - - def test_validate_event_variants_no_control(self): - funnel_results = [ - [ - { - "action_id": "funnel-step-1", - "name": "funnel-step-1", - "order": 0, - "breakdown": ["test"], - "breakdown_value": ["test"], - }, - { - "action_id": "funnel-step-2", - "name": "funnel-step-2", - "order": 1, - "breakdown": ["test"], - "breakdown_value": ["test"], - }, - ] - ] - - expected_errors = json.dumps( - { - ExperimentNoResultsErrorKeys.NO_EVENTS: False, - ExperimentNoResultsErrorKeys.NO_FLAG_INFO: False, - ExperimentNoResultsErrorKeys.NO_CONTROL_VARIANT: True, - ExperimentNoResultsErrorKeys.NO_TEST_VARIANT: False, - } - ) - - with self.assertRaises(ValidationError) as context: - validate_funnel_event_variants(funnel_results, ["test", "control"]) - - self.assertEqual(context.exception.detail[0], expected_errors) - - def test_validate_event_variants_no_test(self): - funnel_results = [ - [ - { - "action_id": "funnel-step-1", - "name": "funnel-step-1", - "order": 0, - "breakdown": ["control"], - "breakdown_value": ["control"], - }, - { - "action_id": "funnel-step-2", - "name": "funnel-step-2", - "order": 1, - "breakdown": ["control"], - "breakdown_value": ["control"], - }, - ] - ] - - expected_errors = json.dumps( - { - ExperimentNoResultsErrorKeys.NO_EVENTS: False, - ExperimentNoResultsErrorKeys.NO_FLAG_INFO: False, - ExperimentNoResultsErrorKeys.NO_CONTROL_VARIANT: False, - ExperimentNoResultsErrorKeys.NO_TEST_VARIANT: True, - } - ) - - with self.assertRaises(ValidationError) as context: - validate_funnel_event_variants(funnel_results, ["test", "control"]) - - self.assertEqual(context.exception.detail[0], expected_errors) - - def test_validate_event_variants_no_flag_info(self): - funnel_results = [ - [ - { - "action_id": "funnel-step-1", - "name": "funnel-step-1", - "order": 0, - "breakdown": [""], - "breakdown_value": [""], - }, - { - "action_id": "funnel-step-2", - "name": "funnel-step-2", - "order": 1, - "breakdown": [""], - "breakdown_value": [""], - }, - ] - ] - - expected_errors = json.dumps( - { - ExperimentNoResultsErrorKeys.NO_EVENTS: False, - ExperimentNoResultsErrorKeys.NO_FLAG_INFO: True, - ExperimentNoResultsErrorKeys.NO_CONTROL_VARIANT: True, - ExperimentNoResultsErrorKeys.NO_TEST_VARIANT: True, - } - ) - - with self.assertRaises(ValidationError) as context: - validate_funnel_event_variants(funnel_results, ["test", "control"]) - - self.assertEqual(context.exception.detail[0], expected_errors) - - -class TestTrendExperiments(unittest.TestCase): - def test_validate_event_variants_no_events(self): - trend_results = [] - - expected_errors = json.dumps( - { - ExperimentNoResultsErrorKeys.NO_EVENTS: True, - ExperimentNoResultsErrorKeys.NO_FLAG_INFO: True, - ExperimentNoResultsErrorKeys.NO_CONTROL_VARIANT: True, - ExperimentNoResultsErrorKeys.NO_TEST_VARIANT: True, - } - ) - - with self.assertRaises(ValidationError) as context: - validate_trend_event_variants(trend_results, ["test", "control"]) - - self.assertEqual(context.exception.detail[0], expected_errors) - - def test_validate_event_variants_no_control(self): - trend_results = [ - { - "action": { - "id": "trend-event", - "type": "events", - "order": 0, - "name": "trend-event", - }, - "label": "test_1", - "breakdown_value": "test_1", - } - ] - - expected_errors = json.dumps( - { - ExperimentNoResultsErrorKeys.NO_EVENTS: False, - ExperimentNoResultsErrorKeys.NO_FLAG_INFO: False, - ExperimentNoResultsErrorKeys.NO_CONTROL_VARIANT: True, - ExperimentNoResultsErrorKeys.NO_TEST_VARIANT: False, - } - ) - - with self.assertRaises(ValidationError) as context: - validate_trend_event_variants(trend_results, ["control", "test_1", "test_2"]) - - self.assertEqual(context.exception.detail[0], expected_errors) - - def test_validate_event_variants_no_test(self): - trend_results = [ - { - "action": { - "id": "trend-event", - "type": "events", - "order": 0, - "name": "trend-event", - }, - "label": "control", - "breakdown_value": "control", - } - ] - - expected_errors = json.dumps( - { - ExperimentNoResultsErrorKeys.NO_EVENTS: False, - ExperimentNoResultsErrorKeys.NO_FLAG_INFO: False, - ExperimentNoResultsErrorKeys.NO_CONTROL_VARIANT: False, - ExperimentNoResultsErrorKeys.NO_TEST_VARIANT: True, - } - ) - - with self.assertRaises(ValidationError) as context: - validate_trend_event_variants(trend_results, ["control", "test_1", "test_2"]) - - self.assertEqual(context.exception.detail[0], expected_errors) - - def test_validate_event_variants_no_flag_info(self): - trend_results = [ - { - "action": { - "id": "trend-event", - "type": "events", - "order": 0, - "name": "trend-event", - }, - "label": "", - "breakdown_value": "", - } - ] - - expected_errors = json.dumps( - { - ExperimentNoResultsErrorKeys.NO_EVENTS: False, - ExperimentNoResultsErrorKeys.NO_FLAG_INFO: True, - ExperimentNoResultsErrorKeys.NO_CONTROL_VARIANT: True, - ExperimentNoResultsErrorKeys.NO_TEST_VARIANT: True, - } - ) - - with self.assertRaises(ValidationError) as context: - validate_trend_event_variants(trend_results, ["control", "test_1", "test_2"]) - - self.assertEqual(context.exception.detail[0], expected_errors) diff --git a/ee/clickhouse/queries/test/test_groups_join_query.py b/ee/clickhouse/queries/test/test_groups_join_query.py deleted file mode 100644 index 1564cf8f50..0000000000 --- a/ee/clickhouse/queries/test/test_groups_join_query.py +++ /dev/null @@ -1,48 +0,0 @@ -from ee.clickhouse.queries.groups_join_query import GroupsJoinQuery -from posthog.models.filters import Filter - - -def test_groups_join_query_blank(): - filter = Filter(data={"properties": []}) - - assert GroupsJoinQuery(filter, 2).get_join_query() == ("", {}) - - -def test_groups_join_query_filtering(snapshot): - filter = Filter( - data={ - "properties": [ - { - "key": "industry", - "value": "finance", - "type": "group", - "group_type_index": 0, - } - ] - } - ) - - assert GroupsJoinQuery(filter, 2).get_join_query() == snapshot - - -def test_groups_join_query_filtering_with_custom_key_names(snapshot): - filter = Filter( - data={ - "properties": [ - { - "key": "industry", - "value": "finance", - "type": "group", - "group_type_index": 0, - }, - { - "key": "company", - "value": "crashed", - "type": "group", - "group_type_index": 2, - }, - ] - } - ) - - assert GroupsJoinQuery(filter, 2, join_key="call_me_industry").get_join_query() == snapshot diff --git a/ee/clickhouse/queries/test/test_lifecycle.py b/ee/clickhouse/queries/test/test_lifecycle.py deleted file mode 100644 index f9fecbd0c5..0000000000 --- a/ee/clickhouse/queries/test/test_lifecycle.py +++ /dev/null @@ -1,298 +0,0 @@ -from datetime import datetime, timedelta - -from django.utils.timezone import now -from freezegun.api import freeze_time - -from posthog.constants import FILTER_TEST_ACCOUNTS, TRENDS_LIFECYCLE -from posthog.models.filters.filter import Filter -from posthog.models.group.util import create_group -from posthog.models.group_type_mapping import GroupTypeMapping -from posthog.models.person import Person -from posthog.queries.test.test_lifecycle import TestLifecycleBase -from posthog.queries.trends.trends import Trends -from posthog.test.base import ( - also_test_with_materialized_columns, - snapshot_clickhouse_queries, -) -from posthog.test.test_journeys import journeys_for - - -class TestClickhouseLifecycle(TestLifecycleBase): - @snapshot_clickhouse_queries - def test_test_account_filters_with_groups(self): - self.team.test_account_filters = [{"key": "key", "type": "group", "value": "value", "group_type_index": 0}] - self.team.save() - - GroupTypeMapping.objects.create( - team=self.team, project_id=self.team.project_id, group_type="organization", group_type_index=0 - ) - create_group( - self.team.pk, - group_type_index=0, - group_key="in", - properties={"key": "value"}, - ) - create_group( - self.team.pk, - group_type_index=0, - group_key="out", - properties={"key": "othervalue"}, - ) - - with freeze_time("2020-01-11T12:00:00Z"): - Person.objects.create(distinct_ids=["person1"], team_id=self.team.pk) - - with freeze_time("2020-01-09T12:00:00Z"): - Person.objects.create(distinct_ids=["person2"], team_id=self.team.pk) - - journeys_for( - { - "person1": [ - { - "event": "$pageview", - "timestamp": datetime(2020, 1, 11, 12), - "properties": {"$group_0": "out"}, - } - ], - "person2": [ - { - "event": "$pageview", - "timestamp": datetime(2020, 1, 9, 12), - "properties": {"$group_0": "in"}, - }, - { - "event": "$pageview", - "timestamp": datetime(2020, 1, 12, 12), - "properties": {"$group_0": "in"}, - }, - { - "event": "$pageview", - "timestamp": datetime(2020, 1, 15, 12), - "properties": {"$group_0": "in"}, - }, - ], - }, - self.team, - ) - result = Trends().run( - Filter( - data={ - "date_from": "2020-01-12T00:00:00Z", - "date_to": "2020-01-19T00:00:00Z", - "events": [{"id": "$pageview", "type": "events", "order": 0}], - "shown_as": TRENDS_LIFECYCLE, - FILTER_TEST_ACCOUNTS: True, - }, - team=self.team, - ), - self.team, - ) - - self.assertLifecycleResults( - result, - [ - {"status": "dormant", "data": [0, -1, 0, 0, -1, 0, 0, 0]}, - {"status": "new", "data": [0, 0, 0, 0, 0, 0, 0, 0]}, - {"status": "resurrecting", "data": [1, 0, 0, 1, 0, 0, 0, 0]}, - {"status": "returning", "data": [0, 0, 0, 0, 0, 0, 0, 0]}, - ], - ) - - @snapshot_clickhouse_queries - def test_lifecycle_edge_cases(self): - # This test tests behavior when created_at is different from first matching event and dormant/resurrecting/returning logic - with freeze_time("2020-01-11T12:00:00Z"): - Person.objects.create(distinct_ids=["person1"], team_id=self.team.pk) - - journeys_for( - { - "person1": [ - {"event": "$pageview", "timestamp": datetime(2020, 1, 12, 12)}, - {"event": "$pageview", "timestamp": datetime(2020, 1, 13, 12)}, - {"event": "$pageview", "timestamp": datetime(2020, 1, 15, 12)}, - {"event": "$pageview", "timestamp": datetime(2020, 1, 16, 12)}, - ] - }, - self.team, - ) - - result = Trends().run( - Filter( - data={ - "date_from": "2020-01-11T00:00:00Z", - "date_to": "2020-01-18T00:00:00Z", - "events": [{"id": "$pageview", "type": "events", "order": 0}], - "shown_as": TRENDS_LIFECYCLE, - }, - team=self.team, - ), - self.team, - ) - - self.assertLifecycleResults( - result, - [ - {"status": "dormant", "data": [0, 0, 0, -1, 0, 0, -1, 0]}, - {"status": "new", "data": [0, 0, 0, 0, 0, 0, 0, 0]}, - {"status": "resurrecting", "data": [0, 1, 0, 0, 1, 0, 0, 0]}, - {"status": "returning", "data": [0, 0, 1, 0, 0, 1, 0, 0]}, - ], - ) - - @snapshot_clickhouse_queries - def test_interval_dates_days(self): - with freeze_time("2021-05-05T12:00:00Z"): - self._setup_returning_lifecycle_data(20) - - result = self._run_lifecycle({"date_from": "-7d", "interval": "day"}) - - self.assertLifecycleResults( - result, - [ - {"status": "dormant", "data": [0] * 8}, - {"status": "new", "data": [0] * 8}, - {"status": "resurrecting", "data": [0] * 8}, - {"status": "returning", "data": [1] * 8}, - ], - ) - self.assertEqual( - result[0]["days"], - [ - "2021-04-28", - "2021-04-29", - "2021-04-30", - "2021-05-01", - "2021-05-02", - "2021-05-03", - "2021-05-04", - "2021-05-05", - ], - ) - - @snapshot_clickhouse_queries - def test_interval_dates_weeks(self): - with freeze_time("2021-05-06T12:00:00Z"): - self._setup_returning_lifecycle_data(50) - - result = self._run_lifecycle({"date_from": "-30d", "interval": "week"}) - - self.assertLifecycleResults( - result, - [ - {"status": "dormant", "data": [0] * 5}, - {"status": "new", "data": [0] * 5}, - {"status": "resurrecting", "data": [0] * 5}, - {"status": "returning", "data": [1] * 5}, - ], - ) - self.assertEqual( - result[0]["days"], - ["2021-04-05", "2021-04-12", "2021-04-19", "2021-04-26", "2021-05-03"], - ) - - @snapshot_clickhouse_queries - def test_interval_dates_months(self): - with freeze_time("2021-05-05T12:00:00Z"): - self._setup_returning_lifecycle_data(120) - - result = self._run_lifecycle({"date_from": "-90d", "interval": "month"}) - - self.assertLifecycleResults( - result, - [ - {"status": "dormant", "data": [0] * 4}, - {"status": "new", "data": [0] * 4}, - {"status": "resurrecting", "data": [0] * 4}, - {"status": "returning", "data": [1] * 4}, - ], - ) - self.assertEqual(result[0]["days"], ["2021-02-01", "2021-03-01", "2021-04-01", "2021-05-01"]) - - @also_test_with_materialized_columns(event_properties=["$current_url"]) - @snapshot_clickhouse_queries - def test_lifecycle_hogql_event_properties(self): - with freeze_time("2021-05-05T12:00:00Z"): - self._setup_returning_lifecycle_data(20) - result = self._run_lifecycle( - { - "date_from": "-7d", - "interval": "day", - "properties": [ - { - "key": "like(properties.$current_url, '%example%') and 'bla' != 'a%sd'", - "type": "hogql", - }, - ], - } - ) - self.assertLifecycleResults( - result, - [ - {"status": "dormant", "data": [0] * 8}, - {"status": "new", "data": [0] * 8}, - {"status": "resurrecting", "data": [0] * 8}, - {"status": "returning", "data": [1] * 8}, - ], - ) - - @also_test_with_materialized_columns(event_properties=[], person_properties=["email"]) - @snapshot_clickhouse_queries - def test_lifecycle_hogql_person_properties(self): - with freeze_time("2021-05-05T12:00:00Z"): - self._setup_returning_lifecycle_data(20) - result = self._run_lifecycle( - { - "date_from": "-7d", - "interval": "day", - "properties": [ - { - "key": "like(person.properties.email, '%test.com')", - "type": "hogql", - }, - ], - } - ) - - self.assertLifecycleResults( - result, - [ - {"status": "dormant", "data": [0] * 8}, - {"status": "new", "data": [0] * 8}, - {"status": "resurrecting", "data": [0] * 8}, - {"status": "returning", "data": [1] * 8}, - ], - ) - - def _setup_returning_lifecycle_data(self, days): - with freeze_time("2019-01-01T12:00:00Z"): - Person.objects.create( - distinct_ids=["person1"], - team_id=self.team.pk, - properties={"email": "person@test.com"}, - ) - - journeys_for( - { - "person1": [ - { - "event": "$pageview", - "timestamp": (now() - timedelta(days=n)).strftime("%Y-%m-%d %H:%M:%S.%f"), - "properties": {"$current_url": "http://example.com"}, - } - for n in range(days) - ] - }, - self.team, - create_people=False, - ) - - def _run_lifecycle(self, data): - filter = Filter( - data={ - "events": [{"id": "$pageview", "type": "events", "order": 0}], - "shown_as": TRENDS_LIFECYCLE, - **data, - }, - team=self.team, - ) - return Trends().run(filter, self.team) diff --git a/ee/clickhouse/queries/test/test_person_distinct_id_query.py b/ee/clickhouse/queries/test/test_person_distinct_id_query.py deleted file mode 100644 index 52d75ffdd3..0000000000 --- a/ee/clickhouse/queries/test/test_person_distinct_id_query.py +++ /dev/null @@ -1,5 +0,0 @@ -from posthog.queries import person_distinct_id_query - - -def test_person_distinct_id_query(db, snapshot): - assert person_distinct_id_query.get_team_distinct_ids_query(2) == snapshot diff --git a/ee/clickhouse/queries/test/test_person_query.py b/ee/clickhouse/queries/test/test_person_query.py deleted file mode 100644 index bd2a280d40..0000000000 --- a/ee/clickhouse/queries/test/test_person_query.py +++ /dev/null @@ -1,405 +0,0 @@ -import pytest - -from ee.clickhouse.materialized_columns.columns import materialize -from posthog.client import sync_execute -from posthog.models.filters import Filter -from posthog.models.team import Team -from posthog.queries.person_query import PersonQuery -from posthog.test.base import _create_person -from posthog.models.cohort import Cohort -from posthog.models.property import Property - - -def person_query(team: Team, filter: Filter, **kwargs): - return PersonQuery(filter, team.pk, **kwargs).get_query()[0] - - -def run_query(team: Team, filter: Filter, **kwargs): - query, params = PersonQuery(filter, team.pk, **kwargs).get_query() - rows = sync_execute(query, {**params, **filter.hogql_context.values, "team_id": team.pk}) - - if len(rows) > 0: - return {"rows": len(rows), "columns": len(rows[0])} - else: - return {"rows": 0} - - -@pytest.fixture -def testdata(db, team): - materialize("person", "email") - _create_person( - distinct_ids=["1"], - team_id=team.pk, - properties={"email": "tim@posthog.com", "$os": "windows", "$browser": "chrome"}, - ) - _create_person( - distinct_ids=["2"], - team_id=team.pk, - properties={"email": "marius@posthog.com", "$os": "Mac", "$browser": "firefox"}, - ) - _create_person( - distinct_ids=["3"], - team_id=team.pk, - properties={ - "email": "karl@example.com", - "$os": "windows", - "$browser": "mozilla", - }, - ) - - -def test_person_query(testdata, team, snapshot): - filter = Filter(data={"properties": []}) - - assert person_query(team, filter) == snapshot - assert run_query(team, filter) == {"rows": 3, "columns": 1} - - filter = Filter( - data={ - "properties": [ - {"key": "event_prop", "value": "value"}, - { - "key": "email", - "type": "person", - "value": "posthog", - "operator": "icontains", - }, - ] - } - ) - - assert person_query(team, filter) == snapshot - assert run_query(team, filter) == {"rows": 2, "columns": 1} - - -def test_person_query_with_multiple_cohorts(testdata, team, snapshot): - filter = Filter(data={"properties": []}) - - for i in range(10): - _create_person( - team_id=team.pk, - distinct_ids=[f"person{i}"], - properties={"group": i, "email": f"{i}@hey.com"}, - ) - - cohort1 = Cohort.objects.create( - team=team, - filters={ - "properties": { - "type": "OR", - "values": [ - { - "type": "OR", - "values": [ - {"key": "group", "value": "none", "type": "person"}, - {"key": "group", "value": [1, 2, 3], "type": "person"}, - ], - } - ], - } - }, - name="cohort1", - ) - - cohort2 = Cohort.objects.create( - team=team, - filters={ - "properties": { - "type": "OR", - "values": [ - { - "type": "OR", - "values": [ - { - "key": "group", - "value": [1, 2, 3, 4, 5, 6], - "type": "person", - }, - ], - } - ], - } - }, - name="cohort2", - ) - - cohort1.calculate_people_ch(pending_version=0) - cohort2.calculate_people_ch(pending_version=0) - - cohort_filters = [ - Property(key="id", type="cohort", value=cohort1.pk), - Property(key="id", type="cohort", value=cohort2.pk), - ] - - filter = Filter( - data={ - "properties": [ - { - "key": "email", - "type": "person", - "value": "posthog", - "operator": "icontains", - }, - ] - } - ) - - filter2 = Filter( - data={ - "properties": [ - { - "key": "email", - "type": "person", - "value": "hey", - "operator": "icontains", - }, - ] - } - ) - - assert run_query(team, filter) == {"rows": 2, "columns": 1} - - # 3 rows because the intersection between cohorts 1 and 2 is person1, person2, and person3, - # with their respective group properties - assert run_query(team, filter2, cohort_filters=cohort_filters) == { - "rows": 3, - "columns": 1, - } - assert person_query(team, filter2, cohort_filters=cohort_filters) == snapshot - - -def test_person_query_with_anded_property_groups(testdata, team, snapshot): - filter = Filter( - data={ - "properties": { - "type": "AND", - "values": [ - {"key": "event_prop", "value": "value"}, - { - "key": "email", - "type": "person", - "value": "posthog", - "operator": "icontains", - }, - { - "key": "$os", - "type": "person", - "value": "windows", - "operator": "exact", - }, - { - "key": "$browser", - "type": "person", - "value": "chrome", - "operator": "exact", - }, - ], - } - } - ) - - assert person_query(team, filter) == snapshot - assert run_query(team, filter) == {"rows": 1, "columns": 1} - - -def test_person_query_with_and_and_or_property_groups(testdata, team, snapshot): - filter = Filter( - data={ - "properties": { - "type": "AND", - "values": [ - { - "type": "OR", - "values": [ - { - "key": "email", - "type": "person", - "value": "posthog", - "operator": "icontains", - }, - { - "key": "$browser", - "type": "person", - "value": "karl", - "operator": "icontains", - }, - ], - }, - { - "type": "OR", - "values": [ - {"key": "event_prop", "value": "value"}, - { - "key": "$os", - "type": "person", - "value": "windows", - "operator": "exact", - }, # this can't be pushed down - # so person query should return only rows from the first OR group - ], - }, - ], - } - } - ) - - assert person_query(team, filter) == snapshot - assert run_query(team, filter) == {"rows": 2, "columns": 2} - - -def test_person_query_with_extra_requested_fields(testdata, team, snapshot): - filter = Filter( - data={ - "properties": [ - { - "key": "email", - "type": "person", - "value": "posthog", - "operator": "icontains", - } - ], - "breakdown": "person_prop_4326", - "breakdown_type": "person", - } - ) - - assert person_query(team, filter) == snapshot - assert run_query(team, filter) == {"rows": 2, "columns": 2} - - filter = filter.shallow_clone({"breakdown": "email", "breakdown_type": "person"}) - assert person_query(team, filter) == snapshot - assert run_query(team, filter) == {"rows": 2, "columns": 2} - - -def test_person_query_with_entity_filters(testdata, team, snapshot): - filter = Filter( - data={ - "events": [ - { - "id": "$pageview", - "properties": [ - { - "key": "email", - "type": "person", - "value": "karl", - "operator": "icontains", - } - ], - } - ] - } - ) - - assert person_query(team, filter) == snapshot - assert run_query(team, filter) == {"rows": 3, "columns": 2} - - assert person_query(team, filter, entity=filter.entities[0]) == snapshot - assert run_query(team, filter, entity=filter.entities[0]) == { - "rows": 1, - "columns": 1, - } - - -def test_person_query_with_extra_fields(testdata, team, snapshot): - filter = Filter( - data={ - "properties": [ - { - "key": "email", - "type": "person", - "value": "posthog", - "operator": "icontains", - } - ] - } - ) - - assert person_query(team, filter, extra_fields=["person_props", "pmat_email"]) == snapshot - assert run_query(team, filter, extra_fields=["person_props", "pmat_email"]) == { - "rows": 2, - "columns": 3, - } - - -def test_person_query_with_entity_filters_and_property_group_filters(testdata, team, snapshot): - filter = Filter( - data={ - "events": [ - { - "id": "$pageview", - "properties": { - "type": "OR", - "values": [ - { - "key": "email", - "type": "person", - "value": "marius", - "operator": "icontains", - }, - { - "key": "$os", - "type": "person", - "value": "windows", - "operator": "icontains", - }, - ], - }, - } - ], - "properties": { - "type": "AND", - "values": [ - { - "type": "OR", - "values": [ - { - "key": "email", - "type": "person", - "value": "posthog", - "operator": "icontains", - }, - { - "key": "$browser", - "type": "person", - "value": "karl", - "operator": "icontains", - }, - ], - }, - { - "type": "OR", - "values": [ - {"key": "event_prop", "value": "value"}, - { - "key": "$os", - "type": "person", - "value": "windows", - "operator": "exact", - }, - ], - }, - ], - }, - } - ) - - assert person_query(team, filter) == snapshot - assert run_query(team, filter) == {"rows": 2, "columns": 3} - - assert person_query(team, filter, entity=filter.entities[0]) == snapshot - assert run_query(team, filter, entity=filter.entities[0]) == { - "rows": 2, - "columns": 2, - } - - -def test_person_query_with_updated_after(testdata, team, snapshot): - filter = Filter(data={"updated_after": "2023-04-04"}) - - assert person_query(team, filter) == snapshot - assert run_query(team, filter) == {"rows": 3, "columns": 1} - - filter = Filter(data={"updated_after": "2055-04-04"}) - - assert person_query(team, filter) == snapshot - assert run_query(team, filter) == {"rows": 0} diff --git a/ee/clickhouse/queries/test/test_property_optimizer.py b/ee/clickhouse/queries/test/test_property_optimizer.py deleted file mode 100644 index 907c035b64..0000000000 --- a/ee/clickhouse/queries/test/test_property_optimizer.py +++ /dev/null @@ -1,552 +0,0 @@ -import unittest - -from posthog.models.filters import Filter -from posthog.queries.property_optimizer import PropertyOptimizer - -PROPERTIES_OF_ALL_TYPES = [ - {"key": "event_prop", "value": ["foo", "bar"], "type": "event"}, - {"key": "person_prop", "value": "efg", "type": "person"}, - {"key": "id", "value": 1, "type": "cohort"}, - {"key": "tag_name", "value": ["label"], "operator": "exact", "type": "element"}, - { - "key": "group_prop", - "value": ["value"], - "operator": "exact", - "type": "group", - "group_type_index": 2, - }, -] - -BASE_FILTER = Filter({"events": [{"id": "$pageview", "type": "events", "order": 0}]}) -FILTER_WITH_GROUPS = BASE_FILTER.shallow_clone({"properties": {"type": "AND", "values": PROPERTIES_OF_ALL_TYPES}}) -TEAM_ID = 3 - - -class TestPersonPropertySelector(unittest.TestCase): - def test_basic_selector(self): - filter = BASE_FILTER.shallow_clone( - { - "properties": { - "type": "OR", - "values": [ - {"key": "person_prop", "value": "efg", "type": "person"}, - {"key": "person_prop2", "value": "efg2", "type": "person"}, - ], - } - } - ) - self.assertTrue(PropertyOptimizer.using_only_person_properties(filter.property_groups)) - - def test_multilevel_selector(self): - filter = BASE_FILTER.shallow_clone( - { - "properties": { - "type": "AND", - "values": [ - { - "type": "OR", - "values": [ - { - "key": "event_prop2", - "value": ["foo2", "bar2"], - "type": "event", - }, - { - "key": "person_prop2", - "value": "efg2", - "type": "person", - }, - ], - }, - { - "type": "AND", - "values": [ - { - "key": "event_prop", - "value": ["foo", "bar"], - "type": "event", - }, - { - "key": "person_prop", - "value": "efg", - "type": "person", - }, - ], - }, - ], - } - } - ) - - self.assertFalse(PropertyOptimizer.using_only_person_properties(filter.property_groups)) - - def test_multilevel_selector_with_valid_OR_persons(self): - filter = BASE_FILTER.shallow_clone( - { - "properties": { - "type": "OR", - "values": [ - { - "type": "OR", - "values": [ - { - "key": "event_prop2", - "value": ["foo2", "bar2"], - "type": "person", - }, - { - "key": "person_prop2", - "value": "efg2", - "type": "person", - }, - ], - }, - { - "type": "AND", - "values": [ - { - "key": "event_prop", - "value": ["foo", "bar"], - "type": "person", - }, - { - "key": "person_prop", - "value": "efg", - "type": "person", - }, - ], - }, - ], - } - } - ) - - self.assertTrue(PropertyOptimizer.using_only_person_properties(filter.property_groups)) - - -class TestPersonPushdown(unittest.TestCase): - maxDiff = None - - def test_basic_pushdowns(self): - property_groups = PropertyOptimizer().parse_property_groups(FILTER_WITH_GROUPS.property_groups) - inner = property_groups.inner - outer = property_groups.outer - - assert inner is not None - assert outer is not None - - self.assertEqual( - inner.to_dict(), - { - "type": "AND", - "values": [{"key": "person_prop", "value": "efg", "type": "person"}], - }, - ) - - self.assertEqual( - outer.to_dict(), - { - "type": "AND", - "values": [ - {"key": "event_prop", "value": ["foo", "bar"], "type": "event"}, - {"key": "id", "value": 1, "type": "cohort"}, - { - "key": "tag_name", - "value": ["label"], - "operator": "exact", - "type": "element", - }, - { - "key": "group_prop", - "value": ["value"], - "operator": "exact", - "type": "group", - "group_type_index": 2, - }, - ], - }, - ) - - def test_person_properties_mixed_with_event_properties(self): - filter = BASE_FILTER.shallow_clone( - { - "properties": { - "type": "AND", - "values": [ - { - "type": "OR", - "values": [ - { - "key": "event_prop2", - "value": ["foo2", "bar2"], - "type": "event", - }, - { - "key": "person_prop2", - "value": "efg2", - "type": "person", - }, - ], - }, - { - "type": "AND", - "values": [ - { - "key": "event_prop", - "value": ["foo", "bar"], - "type": "event", - }, - { - "key": "person_prop", - "value": "efg", - "type": "person", - }, - ], - }, - ], - } - } - ) - - property_groups = PropertyOptimizer().parse_property_groups(filter.property_groups) - inner = property_groups.inner - outer = property_groups.outer - - assert inner is not None - assert outer is not None - - self.assertEqual( - inner.to_dict(), - { - "type": "AND", - "values": [ - { - "type": "AND", - "values": [{"key": "person_prop", "value": "efg", "type": "person"}], - } - ], - }, - ) - - self.assertEqual( - outer.to_dict(), - { - "type": "AND", - "values": [ - { - "type": "OR", - "values": [ - { - "key": "event_prop2", - "value": ["foo2", "bar2"], - "type": "event", - }, - {"key": "person_prop2", "value": "efg2", "type": "person"}, - ], - }, - { - "type": "AND", - "values": [ - { - "key": "event_prop", - "value": ["foo", "bar"], - "type": "event", - }, - # {"key": "person_prop", "value": "efg", "type": "person", }, # this was pushed down - ], - }, - ], - }, - ) - - def test_person_properties_with_or_not_mixed_with_event_properties(self): - filter = BASE_FILTER.shallow_clone( - { - "properties": { - "type": "AND", - "values": [ - { - "type": "OR", - "values": [ - { - "key": "person_prop2", - "value": ["foo2", "bar2"], - "type": "person", - }, - { - "key": "person_prop2", - "value": "efg2", - "type": "person", - }, - ], - }, - { - "type": "AND", - "values": [ - { - "key": "event_prop", - "value": ["foo", "bar"], - "type": "event", - }, - { - "key": "person_prop", - "value": "efg", - "type": "person", - }, - ], - }, - ], - } - } - ) - - property_groups = PropertyOptimizer().parse_property_groups(filter.property_groups) - inner = property_groups.inner - outer = property_groups.outer - - assert inner is not None - assert outer is not None - - self.assertEqual( - inner.to_dict(), - { - "type": "AND", - "values": [ - { - "type": "OR", - "values": [ - { - "key": "person_prop2", - "value": ["foo2", "bar2"], - "type": "person", - }, - {"key": "person_prop2", "value": "efg2", "type": "person"}, - ], - }, - { - "type": "AND", - "values": [{"key": "person_prop", "value": "efg", "type": "person"}], - }, - ], - }, - ) - - self.assertEqual( - outer.to_dict(), - { - "type": "AND", - "values": [ - # OR group was pushed down, so not here anymore - { - "type": "AND", - "values": [ - { - "key": "event_prop", - "value": ["foo", "bar"], - "type": "event", - }, - # {"key": "person_prop", "value": "efg", "type": "person", }, # this was pushed down - ], - } - ], - }, - ) - - def test_person_properties_mixed_with_event_properties_with_misdirection_using_nested_groups(self): - filter = BASE_FILTER.shallow_clone( - { - "properties": { - "type": "AND", - "values": [ - { - "type": "OR", - "values": [ - { - "type": "AND", - "values": [ - { - "type": "OR", - "values": [ - { - "type": "OR", - "values": [ - { - "key": "event_prop2", - "value": ["foo2", "bar2"], - "type": "event", - } - ], - } - ], - }, - { - "type": "AND", - "values": [ - { - "key": "person_prop2", - "value": "efg2", - "type": "person", - } - ], - }, - ], - } - ], - }, - { - "type": "AND", - "values": [ - { - "type": "OR", - "values": [ - { - "type": "AND", - "values": [ - { - "key": "event_prop", - "value": ["foo", "bar"], - "type": "event", - } - ], - } - ], - }, - { - "type": "OR", - "values": [ - { - "type": "AND", - "values": [ - { - "type": "OR", - "values": [ - { - "key": "person_prop", - "value": "efg", - "type": "person", - } - ], - } - ], - } - ], - }, - ], - }, - ], - } - } - ) - - property_groups = PropertyOptimizer().parse_property_groups(filter.property_groups) - inner = property_groups.inner - outer = property_groups.outer - - assert inner is not None - assert outer is not None - - self.assertEqual( - inner.to_dict(), - { - "type": "AND", - "values": [ - { - "type": "AND", - "values": [ - { - "type": "OR", - "values": [ - { - "type": "AND", - "values": [ - { - "type": "OR", - "values": [ - { - "key": "person_prop", - "value": "efg", - "type": "person", - } - ], - } - ], - } - ], - } - ], - } - ], - }, - ) - - self.assertEqual( - outer.to_dict(), - { - "type": "AND", - "values": [ - { - "type": "OR", - "values": [ - { - "type": "AND", - "values": [ - { - "type": "OR", - "values": [ - { - "type": "OR", - "values": [ - { - "key": "event_prop2", - "value": ["foo2", "bar2"], - "type": "event", - } - ], - } - ], - }, - { - "type": "AND", - "values": [ - { - "key": "person_prop2", - "value": "efg2", - "type": "person", - } - ], - }, - ], - } - ], - }, - { - "type": "AND", - "values": [ - { - "type": "OR", - "values": [ - { - "type": "AND", - "values": [ - { - "key": "event_prop", - "value": ["foo", "bar"], - "type": "event", - } - ], - } - ], - }, - # {"type": "OR", "values": [ - # {"type": "AND", "values": [ - # {"type": "OR", "values": [{"key": "person_prop", "value": "efg", "type": "person"}]}] - # }]} - # this was pushed down - ], - }, - ], - }, - ) - - -# TODO: add macobo-groups in mixture to tests as well diff --git a/ee/clickhouse/queries/test/test_util.py b/ee/clickhouse/queries/test/test_util.py deleted file mode 100644 index 54124fde3e..0000000000 --- a/ee/clickhouse/queries/test/test_util.py +++ /dev/null @@ -1,65 +0,0 @@ -from datetime import datetime, timedelta - -from zoneinfo import ZoneInfo -from freezegun.api import freeze_time - -from posthog.client import sync_execute -from posthog.hogql.hogql import HogQLContext -from posthog.models.action import Action -from posthog.models.cohort import Cohort -from posthog.queries.breakdown_props import _parse_breakdown_cohorts -from posthog.queries.util import get_earliest_timestamp -from posthog.test.base import _create_event - - -def test_get_earliest_timestamp(db, team): - with freeze_time("2021-01-21") as frozen_time: - _create_event( - team=team, - event="sign up", - distinct_id="1", - timestamp="2020-01-04T14:10:00Z", - ) - _create_event( - team=team, - event="sign up", - distinct_id="1", - timestamp="2020-01-06T14:10:00Z", - ) - - assert get_earliest_timestamp(team.id) == datetime(2020, 1, 4, 14, 10, tzinfo=ZoneInfo("UTC")) - - frozen_time.tick(timedelta(seconds=1)) - _create_event( - team=team, - event="sign up", - distinct_id="1", - timestamp="1984-01-06T14:10:00Z", - ) - _create_event( - team=team, - event="sign up", - distinct_id="1", - timestamp="2014-01-01T01:00:00Z", - ) - _create_event( - team=team, - event="sign up", - distinct_id="1", - timestamp="2015-01-01T01:00:00Z", - ) - - assert get_earliest_timestamp(team.id) == datetime(2015, 1, 1, 1, tzinfo=ZoneInfo("UTC")) - - -@freeze_time("2021-01-21") -def test_get_earliest_timestamp_with_no_events(db, team): - assert get_earliest_timestamp(team.id) == datetime(2021, 1, 14, tzinfo=ZoneInfo("UTC")) - - -def test_parse_breakdown_cohort_query(db, team): - action = Action.objects.create(team=team, name="$pageview", steps_json=[{"event": "$pageview"}]) - cohort1 = Cohort.objects.create(team=team, groups=[{"action_id": action.pk, "days": 3}], name="cohort1") - queries, params = _parse_breakdown_cohorts([cohort1], HogQLContext(team_id=team.pk)) - assert len(queries) == 1 - sync_execute(queries[0], params) diff --git a/ee/clickhouse/test/__init__.py b/ee/clickhouse/test/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/clickhouse/test/test_error.py b/ee/clickhouse/test/test_error.py deleted file mode 100644 index 983e37b145..0000000000 --- a/ee/clickhouse/test/test_error.py +++ /dev/null @@ -1,50 +0,0 @@ -import pytest -from clickhouse_driver.errors import ServerException - -from posthog.errors import wrap_query_error - - -@pytest.mark.parametrize( - "error,expected_type,expected_message,expected_code", - [ - (AttributeError("Foobar"), "AttributeError", "Foobar", None), - ( - ServerException("Estimated query execution time (34.5 seconds) is too long. Aborting query"), - "EstimatedQueryExecutionTimeTooLong", - "Estimated query execution time (34.5 seconds) is too long. Try reducing its scope by changing the time range.", - None, - ), - ( - ServerException("Syntax error", code=62), - "CHQueryErrorSyntaxError", - "Code: 62.\nSyntax error", - 62, - ), - ( - ServerException("Syntax error", code=9999), - "CHQueryErrorUnknownException", - "Code: 9999.\nSyntax error", - 9999, - ), - ( - ServerException( - "Memory limit (for query) exceeded: would use 42.00 GiB (attempt to allocate chunk of 16757643 bytes), maximum: 42.00 GiB.", - code=241, - ), - "CHQueryErrorMemoryLimitExceeded", - "Query exceeds memory limits. Try reducing its scope by changing the time range.", - 241, - ), - ( - ServerException("Too many simultaneous queries. Maximum: 100.", code=202), - "CHQueryErrorTooManySimultaneousQueries", - "Code: 202.\nToo many simultaneous queries. Try again later.", - 202, - ), - ], -) -def test_wrap_query_error(error, expected_type, expected_message, expected_code): - new_error = wrap_query_error(error) - assert type(new_error).__name__ == expected_type - assert str(new_error) == expected_message - assert getattr(new_error, "code", None) == expected_code diff --git a/ee/clickhouse/test/test_system_status.py b/ee/clickhouse/test/test_system_status.py deleted file mode 100644 index 80a5b692c6..0000000000 --- a/ee/clickhouse/test/test_system_status.py +++ /dev/null @@ -1,23 +0,0 @@ -def test_system_status(db): - from posthog.clickhouse.system_status import system_status - - results = list(system_status()) - assert [row["key"] for row in results] == [ - "clickhouse_alive", - "clickhouse_event_count", - "clickhouse_event_count_last_month", - "clickhouse_event_count_month_to_date", - "clickhouse_session_recordings_count_month_to_date", - "clickhouse_session_recordings_events_count_month_to_date", - "clickhouse_session_recordings_events_size_ingested", - "clickhouse_disk_0_free_space", - "clickhouse_disk_0_total_space", - "clickhouse_table_sizes", - "clickhouse_system_metrics", - "last_event_ingested_timestamp", - "dead_letter_queue_size", - "dead_letter_queue_events_last_day", - "dead_letter_queue_ratio_ok", - ] - assert len(results[9]["subrows"]["rows"]) > 0 - assert len(results[10]["subrows"]["rows"]) > 0 diff --git a/ee/clickhouse/views/__init__.py b/ee/clickhouse/views/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/clickhouse/views/experiment_holdouts.py b/ee/clickhouse/views/experiment_holdouts.py deleted file mode 100644 index c7d8eff83c..0000000000 --- a/ee/clickhouse/views/experiment_holdouts.py +++ /dev/null @@ -1,110 +0,0 @@ -from typing import Any -from rest_framework import serializers, viewsets -from rest_framework.exceptions import ValidationError -from rest_framework.request import Request -from rest_framework.response import Response -from django.db import transaction - - -from posthog.api.feature_flag import FeatureFlagSerializer -from posthog.api.routing import TeamAndOrgViewSetMixin -from posthog.api.shared import UserBasicSerializer -from posthog.models.experiment import ExperimentHoldout - - -class ExperimentHoldoutSerializer(serializers.ModelSerializer): - created_by = UserBasicSerializer(read_only=True) - - class Meta: - model = ExperimentHoldout - fields = [ - "id", - "name", - "description", - "filters", - "created_by", - "created_at", - "updated_at", - ] - read_only_fields = [ - "id", - "created_by", - "created_at", - "updated_at", - ] - - def _get_filters_with_holdout_id(self, id: int, filters: list) -> list: - variant_key = f"holdout-{id}" - updated_filters = [] - for filter in filters: - updated_filters.append( - { - **filter, - "variant": variant_key, - } - ) - return updated_filters - - def create(self, validated_data: dict, *args: Any, **kwargs: Any) -> ExperimentHoldout: - request = self.context["request"] - validated_data["created_by"] = request.user - validated_data["team_id"] = self.context["team_id"] - - if not validated_data.get("filters"): - raise ValidationError("Filters are required to create an holdout group") - - instance = super().create(validated_data) - instance.filters = self._get_filters_with_holdout_id(instance.id, instance.filters) - instance.save() - return instance - - def update(self, instance: ExperimentHoldout, validated_data): - filters = validated_data.get("filters") - if filters and instance.filters != filters: - # update flags on all experiments in this holdout group - new_filters = self._get_filters_with_holdout_id(instance.id, filters) - validated_data["filters"] = new_filters - with transaction.atomic(): - for experiment in instance.experiment_set.all(): - flag = experiment.feature_flag - existing_flag_serializer = FeatureFlagSerializer( - flag, - data={ - "filters": {**flag.filters, "holdout_groups": validated_data["filters"]}, - }, - partial=True, - context=self.context, - ) - existing_flag_serializer.is_valid(raise_exception=True) - existing_flag_serializer.save() - - return super().update(instance, validated_data) - - -class ExperimentHoldoutViewSet(TeamAndOrgViewSetMixin, viewsets.ModelViewSet): - scope_object = "experiment" - queryset = ExperimentHoldout.objects.prefetch_related("created_by").all() - serializer_class = ExperimentHoldoutSerializer - ordering = "-created_at" - - def destroy(self, request: Request, *args: Any, **kwargs: Any) -> Response: - instance = self.get_object() - - with transaction.atomic(): - for experiment in instance.experiment_set.all(): - flag = experiment.feature_flag - existing_flag_serializer = FeatureFlagSerializer( - flag, - data={ - "filters": { - **flag.filters, - "holdout_groups": None, - } - }, - partial=True, - context={"request": request, "team": self.team, "team_id": self.team_id}, - ) - existing_flag_serializer.is_valid(raise_exception=True) - existing_flag_serializer.save() - - return super().destroy(request, *args, **kwargs) diff --git a/ee/clickhouse/views/experiment_saved_metrics.py b/ee/clickhouse/views/experiment_saved_metrics.py deleted file mode 100644 index 911a34530c..0000000000 --- a/ee/clickhouse/views/experiment_saved_metrics.py +++ /dev/null @@ -1,85 +0,0 @@ -import pydantic -from rest_framework import serializers, viewsets -from rest_framework.exceptions import ValidationError - - -from posthog.api.routing import TeamAndOrgViewSetMixin -from posthog.api.shared import UserBasicSerializer -from posthog.models.experiment import ExperimentSavedMetric, ExperimentToSavedMetric -from posthog.schema import ExperimentFunnelsQuery, ExperimentTrendsQuery - - -class ExperimentToSavedMetricSerializer(serializers.ModelSerializer): - query = serializers.JSONField(source="saved_metric.query", read_only=True) - name = serializers.CharField(source="saved_metric.name", read_only=True) - - class Meta: - model = ExperimentToSavedMetric - fields = [ - "id", - "experiment", - "saved_metric", - "metadata", - "created_at", - "query", - "name", - ] - read_only_fields = [ - "id", - "created_at", - ] - - -class ExperimentSavedMetricSerializer(serializers.ModelSerializer): - created_by = UserBasicSerializer(read_only=True) - - class Meta: - model = ExperimentSavedMetric - fields = [ - "id", - "name", - "description", - "query", - "created_by", - "created_at", - "updated_at", - ] - read_only_fields = [ - "id", - "created_by", - "created_at", - "updated_at", - ] - - def validate_query(self, value): - if not value: - raise ValidationError("Query is required to create a saved metric") - - metric_query = value - - if metric_query.get("kind") not in ["ExperimentTrendsQuery", "ExperimentFunnelsQuery"]: - raise ValidationError("Metric query kind must be 'ExperimentTrendsQuery' or 'ExperimentFunnelsQuery'") - - # pydantic models are used to validate the query - try: - if metric_query["kind"] == "ExperimentTrendsQuery": - ExperimentTrendsQuery(**metric_query) - else: - ExperimentFunnelsQuery(**metric_query) - except pydantic.ValidationError as e: - raise ValidationError(str(e.errors())) from e - - return value - - def create(self, validated_data): - request = self.context["request"] - validated_data["created_by"] = request.user - validated_data["team_id"] = self.context["team_id"] - return super().create(validated_data) - - -class ExperimentSavedMetricViewSet(TeamAndOrgViewSetMixin, viewsets.ModelViewSet): - scope_object = "experiment" - queryset = ExperimentSavedMetric.objects.prefetch_related("created_by").all() - serializer_class = ExperimentSavedMetricSerializer - ordering = "-created_at" diff --git a/ee/clickhouse/views/experiments.py b/ee/clickhouse/views/experiments.py deleted file mode 100644 index a0045f96f2..0000000000 --- a/ee/clickhouse/views/experiments.py +++ /dev/null @@ -1,630 +0,0 @@ -from typing import Any, Optional -from collections.abc import Callable - -from django.utils.timezone import now -from rest_framework import serializers, viewsets -from rest_framework.exceptions import ValidationError -from rest_framework.request import Request -from rest_framework.response import Response -from statshog.defaults.django import statsd -import posthoganalytics - -from ee.clickhouse.queries.experiments.funnel_experiment_result import ( - ClickhouseFunnelExperimentResult, -) -from ee.clickhouse.queries.experiments.secondary_experiment_result import ( - ClickhouseSecondaryExperimentResult, -) -from ee.clickhouse.queries.experiments.trend_experiment_result import ( - ClickhouseTrendExperimentResult, -) -from ee.clickhouse.queries.experiments.utils import requires_flag_warning -from ee.clickhouse.views.experiment_holdouts import ExperimentHoldoutSerializer -from ee.clickhouse.views.experiment_saved_metrics import ExperimentToSavedMetricSerializer -from posthog.api.cohort import CohortSerializer -from posthog.api.feature_flag import FeatureFlagSerializer, MinimalFeatureFlagSerializer -from posthog.api.routing import TeamAndOrgViewSetMixin -from posthog.api.shared import UserBasicSerializer -from posthog.api.utils import action -from posthog.caching.insight_cache import update_cached_state -from posthog.clickhouse.query_tagging import tag_queries -from posthog.constants import INSIGHT_TRENDS -from posthog.models.experiment import Experiment, ExperimentHoldout, ExperimentSavedMetric -from posthog.models.filters.filter import Filter -from posthog.utils import generate_cache_key, get_safe_cache - -EXPERIMENT_RESULTS_CACHE_DEFAULT_TTL = 60 * 60 # 1 hour - - -def _calculate_experiment_results(experiment: Experiment, refresh: bool = False): - # :TRICKY: Don't run any filter simplification on the experiment filter yet - filter = Filter({**experiment.filters, "is_simplified": True}, team=experiment.team) - - exposure_filter_data = (experiment.parameters or {}).get("custom_exposure_filter") - exposure_filter = None - if exposure_filter_data: - exposure_filter = Filter(data={**exposure_filter_data, "is_simplified": True}, team=experiment.team) - - if filter.insight == INSIGHT_TRENDS: - calculate_func = lambda: ClickhouseTrendExperimentResult( - filter, - experiment.team, - experiment.feature_flag, - experiment.start_date, - experiment.end_date, - holdout=experiment.holdout, - custom_exposure_filter=exposure_filter, - ).get_results() - else: - calculate_func = lambda: ClickhouseFunnelExperimentResult( - filter, - experiment.team, - experiment.feature_flag, - experiment.start_date, - experiment.end_date, - holdout=experiment.holdout, - ).get_results() - - return _experiment_results_cached( - experiment, - "primary", - filter, - calculate_func, - refresh=refresh, - exposure_filter=exposure_filter, - ) - - -def _calculate_secondary_experiment_results(experiment: Experiment, parsed_id: int, refresh: bool = False): - filter = Filter(experiment.secondary_metrics[parsed_id]["filters"], team=experiment.team) - - calculate_func = lambda: ClickhouseSecondaryExperimentResult( - filter, - experiment.team, - experiment.feature_flag, - experiment.start_date, - experiment.end_date, - ).get_results() - return _experiment_results_cached(experiment, "secondary", filter, calculate_func, refresh=refresh) - - -def _experiment_results_cached( - experiment: Experiment, - results_type: str, - filter: Filter, - calculate_func: Callable, - refresh: bool, - exposure_filter: Optional[Filter] = None, -): - cache_filter = filter.shallow_clone( - { - "date_from": experiment.start_date, - "date_to": experiment.end_date if experiment.end_date else None, - } - ) - - exposure_suffix = "" if not exposure_filter else f"_{exposure_filter.toJSON()}" - - cache_key = generate_cache_key( - f"experiment_{results_type}_{cache_filter.toJSON()}_{experiment.team.pk}_{experiment.pk}{exposure_suffix}" - ) - - tag_queries(cache_key=cache_key) - - cached_result_package = get_safe_cache(cache_key) - - if cached_result_package and cached_result_package.get("result") and not refresh: - cached_result_package["is_cached"] = True - statsd.incr( - "posthog_cached_function_cache_hit", - tags={"route": "/projects/:id/experiments/:experiment_id/results"}, - ) - return cached_result_package - - statsd.incr( - "posthog_cached_function_cache_miss", - tags={"route": "/projects/:id/experiments/:experiment_id/results"}, - ) - - result = calculate_func() - - timestamp = now() - fresh_result_package = {"result": result, "last_refresh": now(), "is_cached": False} - - # Event to detect experiment significance flip-flopping - posthoganalytics.capture( - experiment.created_by.email, - "experiment result calculated", - properties={ - "experiment_id": experiment.id, - "name": experiment.name, - "goal_type": experiment.filters.get("insight", "FUNNELS"), - "significant": result.get("significant"), - "significance_code": result.get("significance_code"), - "probability": result.get("probability"), - }, - ) - - update_cached_state( - experiment.team.pk, - cache_key, - timestamp, - fresh_result_package, - ttl=EXPERIMENT_RESULTS_CACHE_DEFAULT_TTL, - ) - - return fresh_result_package - - -class ExperimentSerializer(serializers.ModelSerializer): - feature_flag_key = serializers.CharField(source="get_feature_flag_key") - created_by = UserBasicSerializer(read_only=True) - feature_flag = MinimalFeatureFlagSerializer(read_only=True) - holdout = ExperimentHoldoutSerializer(read_only=True) - holdout_id = serializers.PrimaryKeyRelatedField( - queryset=ExperimentHoldout.objects.all(), source="holdout", required=False, allow_null=True - ) - saved_metrics = ExperimentToSavedMetricSerializer(many=True, source="experimenttosavedmetric_set", read_only=True) - saved_metrics_ids = serializers.ListField(child=serializers.JSONField(), required=False, allow_null=True) - - class Meta: - model = Experiment - fields = [ - "id", - "name", - "description", - "start_date", - "end_date", - "feature_flag_key", - "feature_flag", - "holdout", - "holdout_id", - "exposure_cohort", - "parameters", - "secondary_metrics", - "saved_metrics", - "saved_metrics_ids", - "filters", - "archived", - "created_by", - "created_at", - "updated_at", - "type", - "metrics", - "metrics_secondary", - "stats_config", - ] - read_only_fields = [ - "id", - "created_by", - "created_at", - "updated_at", - "feature_flag", - "exposure_cohort", - "holdout", - "saved_metrics", - ] - - def validate_saved_metrics_ids(self, value): - if value is None: - return value - - # check value is valid json list with id and optionally metadata param - if not isinstance(value, list): - raise ValidationError("Saved metrics must be a list") - - for saved_metric in value: - if not isinstance(saved_metric, dict): - raise ValidationError("Saved metric must be an object") - if "id" not in saved_metric: - raise ValidationError("Saved metric must have an id") - if "metadata" in saved_metric and not isinstance(saved_metric["metadata"], dict): - raise ValidationError("Metadata must be an object") - - # metadata is optional, but if it exists, should have type key - # TODO: extend with other metadata keys when known - if "metadata" in saved_metric and "type" not in saved_metric["metadata"]: - raise ValidationError("Metadata must have a type key") - - # check if all saved metrics exist - saved_metrics = ExperimentSavedMetric.objects.filter(id__in=[saved_metric["id"] for saved_metric in value]) - if saved_metrics.count() != len(value): - raise ValidationError("Saved metric does not exist") - - return value - - def validate_metrics(self, value): - # TODO 2024-11-15: commented code will be addressed when persistent metrics are implemented. - - return value - - def validate_parameters(self, value): - if not value: - return value - - variants = value.get("feature_flag_variants", []) - - if len(variants) >= 21: - raise ValidationError("Feature flag variants must be less than 21") - elif len(variants) > 0: - if "control" not in [variant["key"] for variant in variants]: - raise ValidationError("Feature flag variants must contain a control variant") - - return value - - def create(self, validated_data: dict, *args: Any, **kwargs: Any) -> Experiment: - is_draft = "start_date" not in validated_data or validated_data["start_date"] is None - - # if not validated_data.get("filters") and not is_draft: - # raise ValidationError("Filters are required when creating a launched experiment") - - saved_metrics_data = validated_data.pop("saved_metrics_ids", []) - - variants = [] - aggregation_group_type_index = None - if validated_data["parameters"]: - variants = validated_data["parameters"].get("feature_flag_variants", []) - aggregation_group_type_index = validated_data["parameters"].get("aggregation_group_type_index") - - request = self.context["request"] - validated_data["created_by"] = request.user - - feature_flag_key = validated_data.pop("get_feature_flag_key") - - holdout_groups = None - if validated_data.get("holdout"): - holdout_groups = validated_data["holdout"].filters - - default_variants = [ - {"key": "control", "name": "Control Group", "rollout_percentage": 50}, - {"key": "test", "name": "Test Variant", "rollout_percentage": 50}, - ] - - feature_flag_filters = { - "groups": [{"properties": [], "rollout_percentage": 100}], - "multivariate": {"variants": variants or default_variants}, - "aggregation_group_type_index": aggregation_group_type_index, - "holdout_groups": holdout_groups, - } - - feature_flag_serializer = FeatureFlagSerializer( - data={ - "key": feature_flag_key, - "name": f'Feature Flag for Experiment {validated_data["name"]}', - "filters": feature_flag_filters, - "active": not is_draft, - "creation_context": "experiments", - }, - context=self.context, - ) - - feature_flag_serializer.is_valid(raise_exception=True) - feature_flag = feature_flag_serializer.save() - - if not validated_data.get("stats_config"): - validated_data["stats_config"] = {"version": 2} - - experiment = Experiment.objects.create( - team_id=self.context["team_id"], feature_flag=feature_flag, **validated_data - ) - - # if this is a web experiment, copy over the variant data to the experiment itself. - if validated_data.get("type", "") == "web": - web_variants = {} - ff_variants = variants or default_variants - - for variant in ff_variants: - web_variants[variant.get("key")] = { - "rollout_percentage": variant.get("rollout_percentage"), - } - - experiment.variants = web_variants - experiment.save() - - if saved_metrics_data: - for saved_metric_data in saved_metrics_data: - saved_metric_serializer = ExperimentToSavedMetricSerializer( - data={ - "experiment": experiment.id, - "saved_metric": saved_metric_data["id"], - "metadata": saved_metric_data.get("metadata"), - }, - context=self.context, - ) - saved_metric_serializer.is_valid(raise_exception=True) - saved_metric_serializer.save() - # TODO: Going the above route means we can still sometimes fail when validation fails? - # But this shouldn't really happen, if it does its a bug in our validation logic (validate_saved_metrics_ids) - return experiment - - def update(self, instance: Experiment, validated_data: dict, *args: Any, **kwargs: Any) -> Experiment: - # if ( - # not instance.filters.get("events") - # and not instance.filters.get("actions") - # and not instance.filters.get("data_warehouse") - # and validated_data.get("start_date") - # and not validated_data.get("filters") - # ): - # raise ValidationError("Filters are required when launching an experiment") - - update_saved_metrics = "saved_metrics_ids" in validated_data - saved_metrics_data = validated_data.pop("saved_metrics_ids", []) or [] - - # We replace all saved metrics on update to avoid issues with partial updates - if update_saved_metrics: - instance.experimenttosavedmetric_set.all().delete() - for saved_metric_data in saved_metrics_data: - saved_metric_serializer = ExperimentToSavedMetricSerializer( - data={ - "experiment": instance.id, - "saved_metric": saved_metric_data["id"], - "metadata": saved_metric_data.get("metadata"), - }, - context=self.context, - ) - saved_metric_serializer.is_valid(raise_exception=True) - saved_metric_serializer.save() - - has_start_date = validated_data.get("start_date") is not None - feature_flag = instance.feature_flag - - expected_keys = { - "name", - "description", - "start_date", - "end_date", - "filters", - "parameters", - "archived", - "secondary_metrics", - "holdout", - "metrics", - "metrics_secondary", - "stats_config", - } - given_keys = set(validated_data.keys()) - extra_keys = given_keys - expected_keys - - if feature_flag.key == validated_data.get("get_feature_flag_key"): - extra_keys.remove("get_feature_flag_key") - - if extra_keys: - raise ValidationError(f"Can't update keys: {', '.join(sorted(extra_keys))} on Experiment") - - # if an experiment has launched, we cannot edit its variants or holdout anymore. - if not instance.is_draft: - if "feature_flag_variants" in validated_data.get("parameters", {}): - if len(validated_data["parameters"]["feature_flag_variants"]) != len(feature_flag.variants): - raise ValidationError("Can't update feature_flag_variants on Experiment") - - for variant in validated_data["parameters"]["feature_flag_variants"]: - if ( - len([ff_variant for ff_variant in feature_flag.variants if ff_variant["key"] == variant["key"]]) - != 1 - ): - raise ValidationError("Can't update feature_flag_variants on Experiment") - if "holdout" in validated_data and validated_data["holdout"] != instance.holdout: - raise ValidationError("Can't update holdout on running Experiment") - - properties = validated_data.get("filters", {}).get("properties") - if properties: - raise ValidationError("Experiments do not support global filter properties") - - if instance.is_draft: - # if feature flag variants or holdout have changed, update the feature flag. - holdout_groups = instance.holdout.filters if instance.holdout else None - if "holdout" in validated_data: - holdout_groups = validated_data["holdout"].filters if validated_data["holdout"] else None - - if validated_data.get("parameters"): - variants = validated_data["parameters"].get("feature_flag_variants", []) - aggregation_group_type_index = validated_data["parameters"].get("aggregation_group_type_index") - - global_filters = validated_data.get("filters") - properties = [] - if global_filters: - properties = global_filters.get("properties", []) - if properties: - raise ValidationError("Experiments do not support global filter properties") - - default_variants = [ - {"key": "control", "name": "Control Group", "rollout_percentage": 50}, - {"key": "test", "name": "Test Variant", "rollout_percentage": 50}, - ] - - feature_flag_filters = { - "groups": feature_flag.filters.get("groups", []), - "multivariate": {"variants": variants or default_variants}, - "aggregation_group_type_index": aggregation_group_type_index, - "holdout_groups": holdout_groups, - } - - existing_flag_serializer = FeatureFlagSerializer( - feature_flag, - data={"filters": feature_flag_filters}, - partial=True, - context=self.context, - ) - existing_flag_serializer.is_valid(raise_exception=True) - existing_flag_serializer.save() - else: - # no parameters provided, just update the holdout if necessary - if "holdout" in validated_data: - existing_flag_serializer = FeatureFlagSerializer( - feature_flag, - data={"filters": {**feature_flag.filters, "holdout_groups": holdout_groups}}, - partial=True, - context=self.context, - ) - existing_flag_serializer.is_valid(raise_exception=True) - existing_flag_serializer.save() - - if instance.is_draft and has_start_date: - feature_flag.active = True - feature_flag.save() - return super().update(instance, validated_data) - else: - # Not a draft, doesn't have start date - # Or draft without start date - return super().update(instance, validated_data) - - -class EnterpriseExperimentsViewSet(TeamAndOrgViewSetMixin, viewsets.ModelViewSet): - scope_object = "experiment" - serializer_class = ExperimentSerializer - queryset = Experiment.objects.prefetch_related( - "feature_flag", "created_by", "holdout", "experimenttosavedmetric_set", "saved_metrics" - ).all() - ordering = "-created_at" - - # ****************************************** - # /projects/:id/experiments/:experiment_id/results - # - # Returns current results of an experiment, and graphs - # 1. Probability of success - # 2. Funnel breakdown graph to display - # ****************************************** - @action(methods=["GET"], detail=True, required_scopes=["experiment:read"]) - def results(self, request: Request, *args: Any, **kwargs: Any) -> Response: - experiment: Experiment = self.get_object() - - refresh = request.query_params.get("refresh") is not None - - if not experiment.filters: - raise ValidationError("Experiment has no target metric") - - result = _calculate_experiment_results(experiment, refresh) - - return Response(result) - - # ****************************************** - # /projects/:id/experiments/:experiment_id/secondary_results?id=<secondary_metric_id> - # - # Returns values for secondary experiment metrics, broken down by variants - # ****************************************** - @action(methods=["GET"], detail=True, required_scopes=["experiment:read"]) - def secondary_results(self, request: Request, *args: Any, **kwargs: Any) -> Response: - experiment: Experiment = self.get_object() - - refresh = request.query_params.get("refresh") is not None - - if not experiment.secondary_metrics: - raise ValidationError("Experiment has no secondary metrics") - - metric_id = request.query_params.get("id") - - if not metric_id: - raise ValidationError("Secondary metric id is required") - - try: - parsed_id = int(metric_id) - except ValueError: - raise ValidationError("Secondary metric id must be an integer") - - if parsed_id > len(experiment.secondary_metrics): - raise ValidationError("Invalid metric ID") - - result = _calculate_secondary_experiment_results(experiment, parsed_id, refresh) - - return Response(result) - - # ****************************************** - # /projects/:id/experiments/requires_flag_implementation - # - # Returns current results of an experiment, and graphs - # 1. Probability of success - # 2. Funnel breakdown graph to display - # ****************************************** - @action(methods=["GET"], detail=False, required_scopes=["experiment:read"]) - def requires_flag_implementation(self, request: Request, *args: Any, **kwargs: Any) -> Response: - filter = Filter(request=request, team=self.team).shallow_clone({"date_from": "-7d", "date_to": ""}) - - warning = requires_flag_warning(filter, self.team) - - return Response({"result": warning}) - - @action(methods=["POST"], detail=True, required_scopes=["experiment:write"]) - def create_exposure_cohort_for_experiment(self, request: Request, *args: Any, **kwargs: Any) -> Response: - experiment = self.get_object() - flag = getattr(experiment, "feature_flag", None) - if not flag: - raise ValidationError("Experiment does not have a feature flag") - - if not experiment.start_date: - raise ValidationError("Experiment does not have a start date") - - if experiment.exposure_cohort: - raise ValidationError("Experiment already has an exposure cohort") - - exposure_filter_data = (experiment.parameters or {}).get("custom_exposure_filter") - exposure_filter = None - if exposure_filter_data: - exposure_filter = Filter(data={**exposure_filter_data, "is_simplified": True}, team=experiment.team) - - target_entity: int | str = "$feature_flag_called" - target_entity_type = "events" - target_filters = [ - { - "key": "$feature_flag", - "value": [flag.key], - "operator": "exact", - "type": "event", - } - ] - - if exposure_filter: - entity = exposure_filter.entities[0] - if entity.id: - target_entity_type = entity.type if entity.type in ["events", "actions"] else "events" - target_entity = entity.id - if entity.type == "actions": - try: - target_entity = int(target_entity) - except ValueError: - raise ValidationError("Invalid action ID") - - target_filters = [ - prop.to_dict() - for prop in entity.property_groups.flat - if prop.type in ("event", "feature", "element", "hogql") - ] - - cohort_serializer = CohortSerializer( - data={ - "is_static": False, - "name": f'Users exposed to experiment "{experiment.name}"', - "is_calculating": True, - "filters": { - "properties": { - "type": "OR", - "values": [ - { - "type": "OR", - "values": [ - { - "type": "behavioral", - "value": "performed_event", - "key": target_entity, - "negation": False, - "event_type": target_entity_type, - "event_filters": target_filters, - "explicit_datetime": experiment.start_date.isoformat(), - } - ], - } - ], - } - }, - }, - context={ - "request": request, - "team": self.team, - "team_id": self.team_id, - }, - ) - - cohort_serializer.is_valid(raise_exception=True) - cohort = cohort_serializer.save() - experiment.exposure_cohort = cohort - experiment.save(update_fields=["exposure_cohort"]) - return Response({"cohort": cohort_serializer.data}, status=201) diff --git a/ee/clickhouse/views/groups.py b/ee/clickhouse/views/groups.py deleted file mode 100644 index be692dc597..0000000000 --- a/ee/clickhouse/views/groups.py +++ /dev/null @@ -1,210 +0,0 @@ -from collections import defaultdict -from typing import cast - -from django.db.models import Q -from drf_spectacular.types import OpenApiTypes -from drf_spectacular.utils import OpenApiParameter -from rest_framework import mixins, request, response, serializers, viewsets -from posthog.api.utils import action -from rest_framework.exceptions import NotFound, ValidationError -from rest_framework.pagination import CursorPagination - -from ee.clickhouse.queries.related_actors_query import RelatedActorsQuery -from posthog.api.documentation import extend_schema -from posthog.api.routing import TeamAndOrgViewSetMixin -from posthog.clickhouse.kafka_engine import trim_quotes_expr -from posthog.client import sync_execute -from posthog.models.group import Group -from posthog.models.group_type_mapping import GroupTypeMapping - - -class GroupTypeSerializer(serializers.ModelSerializer): - class Meta: - model = GroupTypeMapping - fields = ["group_type", "group_type_index", "name_singular", "name_plural"] - read_only_fields = ["group_type", "group_type_index"] - - -class GroupsTypesViewSet(TeamAndOrgViewSetMixin, mixins.ListModelMixin, viewsets.GenericViewSet): - scope_object = "group" - serializer_class = GroupTypeSerializer - queryset = GroupTypeMapping.objects.all().order_by("group_type_index") - pagination_class = None - sharing_enabled_actions = ["list"] - - @action(detail=False, methods=["PATCH"], name="Update group types metadata") - def update_metadata(self, request: request.Request, *args, **kwargs): - for row in cast(list[dict], request.data): - instance = GroupTypeMapping.objects.get( - project_id=self.team.project_id, group_type_index=row["group_type_index"] - ) - serializer = self.get_serializer(instance, data=row) - serializer.is_valid(raise_exception=True) - serializer.save() - - return self.list(request, *args, **kwargs) - - -class GroupCursorPagination(CursorPagination): - ordering = "-created_at" - page_size = 100 - - -class GroupSerializer(serializers.HyperlinkedModelSerializer): - class Meta: - model = Group - fields = ["group_type_index", "group_key", "group_properties", "created_at"] - - -class GroupsViewSet(TeamAndOrgViewSetMixin, mixins.ListModelMixin, viewsets.GenericViewSet): - scope_object = "group" - serializer_class = GroupSerializer - queryset = Group.objects.all() - pagination_class = GroupCursorPagination - - def safely_get_queryset(self, queryset): - return queryset.filter( - group_type_index=self.request.GET["group_type_index"], - group_key__icontains=self.request.GET.get("group_key", ""), - ) - - @extend_schema( - parameters=[ - OpenApiParameter( - "group_type_index", - OpenApiTypes.INT, - description="Specify the group type to list", - required=True, - ), - OpenApiParameter( - "search", - OpenApiTypes.STR, - description="Search the group name", - required=True, - ), - ] - ) - def list(self, request, *args, **kwargs): - """ - List all groups of a specific group type. You must pass ?group_type_index= in the URL. To get a list of valid group types, call /api/:project_id/groups_types/ - """ - if not self.request.GET.get("group_type_index"): - raise ValidationError( - { - "group_type_index": [ - "You must pass ?group_type_index= in this URL. To get a list of valid group types, call /api/:project_id/groups_types/." - ] - } - ) - queryset = self.filter_queryset(self.get_queryset()) - - group_search = self.request.GET.get("search") - if group_search is not None: - queryset = queryset.filter(Q(group_properties__icontains=group_search) | Q(group_key__iexact=group_search)) - - page = self.paginate_queryset(queryset) - if page is not None: - serializer = self.get_serializer(page, many=True) - return self.get_paginated_response(serializer.data) - - serializer = self.get_serializer(queryset, many=True) - return response.Response(serializer.data) - - @extend_schema( - parameters=[ - OpenApiParameter( - "group_type_index", - OpenApiTypes.INT, - description="Specify the group type to find", - required=True, - ), - OpenApiParameter( - "group_key", - OpenApiTypes.STR, - description="Specify the key of the group to find", - required=True, - ), - ] - ) - @action(methods=["GET"], detail=False) - def find(self, request: request.Request, **kw) -> response.Response: - try: - group = self.get_queryset().get(group_key=request.GET["group_key"]) - data = self.get_serializer(group).data - return response.Response(data) - except Group.DoesNotExist: - raise NotFound() - - @extend_schema( - parameters=[ - OpenApiParameter( - "group_type_index", - OpenApiTypes.INT, - description="Specify the group type to find", - required=True, - ), - OpenApiParameter( - "id", - OpenApiTypes.STR, - description="Specify the id of the user to find groups for", - required=True, - ), - ] - ) - @action(methods=["GET"], detail=False) - def related(self, request: request.Request, pk=None, **kw) -> response.Response: - group_type_index = request.GET.get("group_type_index") - id = request.GET["id"] - - results = RelatedActorsQuery(self.team, group_type_index, id).run() - return response.Response(results) - - @action(methods=["GET"], detail=False) - def property_definitions(self, request: request.Request, **kw): - rows = sync_execute( - f""" - SELECT group_type_index, tupleElement(keysAndValues, 1) as key, count(*) as count - FROM groups - ARRAY JOIN JSONExtractKeysAndValuesRaw(group_properties) as keysAndValues - WHERE team_id = %(team_id)s - GROUP BY group_type_index, tupleElement(keysAndValues, 1) - ORDER BY group_type_index ASC, count DESC, key ASC - """, - {"team_id": self.team.pk}, - ) - - group_type_index_to_properties = defaultdict(list) - for group_type_index, key, count in rows: - group_type_index_to_properties[str(group_type_index)].append({"name": key, "count": count}) - - return response.Response(group_type_index_to_properties) - - @action(methods=["GET"], detail=False) - def property_values(self, request: request.Request, **kw): - value_filter = request.GET.get("value") - - query = f""" - SELECT {trim_quotes_expr("tupleElement(keysAndValues, 2)")} as value, count(*) as count - FROM groups - ARRAY JOIN JSONExtractKeysAndValuesRaw(group_properties) as keysAndValues - WHERE team_id = %(team_id)s - AND group_type_index = %(group_type_index)s - AND tupleElement(keysAndValues, 1) = %(key)s - {f"AND {trim_quotes_expr('tupleElement(keysAndValues, 2)')} ILIKE %(value_filter)s" if value_filter else ""} - GROUP BY value - ORDER BY count DESC, value ASC - LIMIT 20 - """ - - params = { - "team_id": self.team.pk, - "group_type_index": request.GET["group_type_index"], - "key": request.GET["key"], - } - - if value_filter: - params["value_filter"] = f"%{value_filter}%" - - rows = sync_execute(query, params) - - return response.Response([{"name": name, "count": count} for name, count in rows]) diff --git a/ee/clickhouse/views/insights.py b/ee/clickhouse/views/insights.py deleted file mode 100644 index 529bf53e77..0000000000 --- a/ee/clickhouse/views/insights.py +++ /dev/null @@ -1,53 +0,0 @@ -from typing import Any - -from posthog.api.utils import action -from rest_framework.permissions import SAFE_METHODS, BasePermission -from rest_framework.request import Request -from rest_framework.response import Response - -from ee.clickhouse.queries.funnels.funnel_correlation import FunnelCorrelation -from ee.clickhouse.queries.stickiness import ClickhouseStickiness -from posthog.api.insight import InsightViewSet -from posthog.decorators import cached_by_filters -from posthog.models import Insight -from posthog.models.dashboard import Dashboard -from posthog.models.filters import Filter - - -class CanEditInsight(BasePermission): - message = "This insight is on a dashboard that can only be edited by its owner, team members invited to editing the dashboard, and project admins." - - def has_object_permission(self, request: Request, view, insight: Insight) -> bool: - if request.method in SAFE_METHODS: - return True - - return view.user_permissions.insight(insight).effective_privilege_level == Dashboard.PrivilegeLevel.CAN_EDIT - - -class EnterpriseInsightsViewSet(InsightViewSet): - permission_classes = [CanEditInsight] - stickiness_query_class = ClickhouseStickiness - - # ****************************************** - # /projects/:id/insights/funnel/correlation - # - # params: - # - params are the same as for funnel - # - # Returns significant events, i.e. those that are correlated with a person - # making it through a funnel - # ****************************************** - @action(methods=["GET", "POST"], url_path="funnel/correlation", detail=False) - def funnel_correlation(self, request: Request, *args: Any, **kwargs: Any) -> Response: - result = self.calculate_funnel_correlation(request) - return Response(result) - - @cached_by_filters - def calculate_funnel_correlation(self, request: Request) -> dict[str, Any]: - team = self.team - filter = Filter(request=request, team=team) - - base_uri = request.build_absolute_uri("/") - result = FunnelCorrelation(filter=filter, team=team, base_uri=base_uri).run() - - return {"result": result} diff --git a/ee/clickhouse/views/person.py b/ee/clickhouse/views/person.py deleted file mode 100644 index 750ce49809..0000000000 --- a/ee/clickhouse/views/person.py +++ /dev/null @@ -1,65 +0,0 @@ -from typing import Optional - -from rest_framework import request, response -from posthog.api.utils import action - -from ee.clickhouse.queries.funnels.funnel_correlation_persons import ( - FunnelCorrelationActors, -) -from posthog.api.person import PersonViewSet -from posthog.constants import ( - FUNNEL_CORRELATION_PERSON_LIMIT, - FUNNEL_CORRELATION_PERSON_OFFSET, - INSIGHT_FUNNELS, -) -from posthog.decorators import cached_by_filters -from posthog.models import Filter -from posthog.utils import format_query_params_absolute_url - - -class EnterprisePersonViewSet(PersonViewSet): - @action(methods=["GET", "POST"], url_path="funnel/correlation", detail=False) - def funnel_correlation(self, request: request.Request, **kwargs) -> response.Response: - if request.user.is_anonymous or not self.team: - return response.Response(data=[]) - - return self._respond_with_cached_results(self.calculate_funnel_correlation_persons(request)) - - @cached_by_filters - def calculate_funnel_correlation_persons( - self, request: request.Request - ) -> dict[str, tuple[list, Optional[str], Optional[str], int]]: - filter = Filter(request=request, data={"insight": INSIGHT_FUNNELS}, team=self.team) - if not filter.correlation_person_limit: - filter = filter.shallow_clone({FUNNEL_CORRELATION_PERSON_LIMIT: 100}) - base_uri = request.build_absolute_uri("/") - actors, serialized_actors, raw_count = FunnelCorrelationActors( - filter=filter, team=self.team, base_uri=base_uri - ).get_actors() - _should_paginate = raw_count >= filter.correlation_person_limit - - next_url = ( - format_query_params_absolute_url( - request, - filter.correlation_person_offset + filter.correlation_person_limit, - offset_alias=FUNNEL_CORRELATION_PERSON_OFFSET, - limit_alias=FUNNEL_CORRELATION_PERSON_LIMIT, - ) - if _should_paginate - else None - ) - initial_url = format_query_params_absolute_url(request, 0) - - # cached_function expects a dict with the key result - return { - "result": ( - serialized_actors, - next_url, - initial_url, - raw_count - len(serialized_actors), - ) - } - - -class LegacyEnterprisePersonViewSet(EnterprisePersonViewSet): - param_derived_from_user_current_team = "team_id" diff --git a/ee/clickhouse/views/test/__init__.py b/ee/clickhouse/views/test/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/clickhouse/views/test/__snapshots__/test_clickhouse_experiment_secondary_results.ambr b/ee/clickhouse/views/test/__snapshots__/test_clickhouse_experiment_secondary_results.ambr deleted file mode 100644 index 5559eacedb..0000000000 --- a/ee/clickhouse/views/test/__snapshots__/test_clickhouse_experiment_secondary_results.ambr +++ /dev/null @@ -1,186 +0,0 @@ -# serializer version: 1 -# name: ClickhouseTestExperimentSecondaryResults.test_basic_secondary_metric_results - ''' - /* user_id:0 request:_snapshot_ */ - SELECT replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', '') AS value, - count(*) as count - FROM events e - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') - AND (has(['control', 'test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature/a-b-test'), '^"|"$', ''))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: ClickhouseTestExperimentSecondaryResults.test_basic_secondary_metric_results.1 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT groupArray(day_start) as date, - groupArray(count) AS total, - breakdown_value - FROM - (SELECT SUM(total) as count, - day_start, - breakdown_value - FROM - (SELECT * - FROM - (SELECT toUInt16(0) AS total, - ticks.day_start as day_start, - breakdown_value - FROM - (SELECT toStartOfDay(toDateTime('2020-01-06 00:00:00', 'UTC')) - toIntervalDay(number) as day_start - FROM numbers(6) - UNION ALL SELECT toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')) as day_start) as ticks - CROSS JOIN - (SELECT breakdown_value - FROM - (SELECT ['control', 'test'] as breakdown_value) ARRAY - JOIN breakdown_value) as sec - ORDER BY breakdown_value, - day_start - UNION ALL SELECT count(*) as total, - toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) as day_start, - transform(ifNull(nullIf(replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', ''), ''), '$$_posthog_breakdown_null_$$'), (['control', 'test']), (['control', 'test']), '$$_posthog_breakdown_other_$$') as breakdown_value - FROM events e - WHERE e.team_id = 99999 - AND event = '$pageview' - AND (has(['control', 'test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature/a-b-test'), '^"|"$', ''))) - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') - GROUP BY day_start, - breakdown_value)) - GROUP BY day_start, - breakdown_value - ORDER BY breakdown_value, - day_start) - GROUP BY breakdown_value - ORDER BY breakdown_value - ''' -# --- -# name: ClickhouseTestExperimentSecondaryResults.test_basic_secondary_metric_results.2 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT replaceRegexpAll(JSONExtractRaw(properties, '$feature_flag_response'), '^"|"$', '') AS value, - count(*) as count - FROM events e - WHERE team_id = 99999 - AND event = '$feature_flag_called' - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') - AND ((has(['control', 'test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature_flag_response'), '^"|"$', ''))) - AND (has(['a-b-test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature_flag'), '^"|"$', '')))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: ClickhouseTestExperimentSecondaryResults.test_basic_secondary_metric_results.3 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT [now()] AS date, - [0] AS total, - '' AS breakdown_value - LIMIT 0 - ''' -# --- -# name: ClickhouseTestExperimentSecondaryResults.test_basic_secondary_metric_results.4 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT array(replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', '')) AS value, - count(*) as count - FROM events e - WHERE team_id = 99999 - AND event IN ['$pageleave_funnel', '$pageview_funnel'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: ClickhouseTestExperimentSecondaryResults.test_basic_secondary_metric_results.5 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT countIf(steps = 1) step_1, - countIf(steps = 2) step_2, - avg(step_1_average_conversion_time_inner) step_1_average_conversion_time, - median(step_1_median_conversion_time_inner) step_1_median_conversion_time, - prop - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - prop - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target, - prop) as max_steps, - step_1_conversion_time , - prop - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 , - if(has([['test'], ['control'], ['']], prop), prop, ['Other']) as prop - FROM - (SELECT *, - if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, ['']) as prop - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = '$pageview_funnel', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = '$pageleave_funnel', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - array(replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', '')) AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, notEmpty(arrayFilter(x -> notEmpty(x), prop))) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['$pageleave_funnel', '$pageview_funnel'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event IN ['$pageleave_funnel', '$pageview_funnel'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') - AND (step_0 = 1 - OR step_1 = 1) ))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps, - prop - HAVING steps = max(max_steps)) - GROUP BY prop - ''' -# --- diff --git a/ee/clickhouse/views/test/__snapshots__/test_clickhouse_experiments.ambr b/ee/clickhouse/views/test/__snapshots__/test_clickhouse_experiments.ambr deleted file mode 100644 index 983cdf00b5..0000000000 --- a/ee/clickhouse/views/test/__snapshots__/test_clickhouse_experiments.ambr +++ /dev/null @@ -1,1531 +0,0 @@ -# serializer version: 1 -# name: ClickhouseTestFunnelExperimentResults.test_experiment_flow_with_event_results - ''' - /* user_id:0 request:_snapshot_ */ - SELECT array(replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', '')) AS value, - count(*) as count - FROM events e - WHERE team_id = 99999 - AND event IN ['$pageleave', '$pageview'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: ClickhouseTestFunnelExperimentResults.test_experiment_flow_with_event_results.1 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT countIf(steps = 1) step_1, - countIf(steps = 2) step_2, - avg(step_1_average_conversion_time_inner) step_1_average_conversion_time, - median(step_1_median_conversion_time_inner) step_1_median_conversion_time, - prop - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - prop - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target, - prop) as max_steps, - step_1_conversion_time , - prop - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 , - if(has([['test'], ['control'], ['']], prop), prop, ['Other']) as prop - FROM - (SELECT *, - if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, ['']) as prop - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = '$pageview', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = '$pageleave', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - array(replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', '')) AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, notEmpty(arrayFilter(x -> notEmpty(x), prop))) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['$pageleave', '$pageview'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event IN ['$pageleave', '$pageview'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') - AND (step_0 = 1 - OR step_1 = 1) ))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps, - prop - HAVING steps = max(max_steps)) - GROUP BY prop - ''' -# --- -# name: ClickhouseTestFunnelExperimentResults.test_experiment_flow_with_event_results.2 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT countIf(steps = 1) step_1, - countIf(steps = 2) step_2, - avg(step_1_average_conversion_time_inner) step_1_average_conversion_time, - median(step_1_median_conversion_time_inner) step_1_median_conversion_time, - prop - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - prop - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target, - prop) as max_steps, - step_1_conversion_time , - prop - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 , - if(has([['test'], ['control'], ['']], prop), prop, ['Other']) as prop - FROM - (SELECT *, - if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, ['']) as prop - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = '$pageview', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = '$pageleave', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - array(replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', '')) AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, notEmpty(arrayFilter(x -> notEmpty(x), prop))) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['$pageleave', '$pageview'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event IN ['$pageleave', '$pageview'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') - AND (step_0 = 1 - OR step_1 = 1) ))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps, - prop - HAVING steps = max(max_steps)) - GROUP BY prop - ''' -# --- -# name: ClickhouseTestFunnelExperimentResults.test_experiment_flow_with_event_results_and_events_out_of_time_range_timezones - ''' - /* user_id:0 request:_snapshot_ */ - SELECT array(replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', '')) AS value, - count(*) as count - FROM events e - WHERE team_id = 99999 - AND event IN ['$pageleave', '$pageview'] - AND toTimeZone(timestamp, 'Europe/Amsterdam') >= toDateTime('2020-01-01 14:20:21', 'Europe/Amsterdam') - AND toTimeZone(timestamp, 'Europe/Amsterdam') <= toDateTime('2020-01-06 10:00:00', 'Europe/Amsterdam') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: ClickhouseTestFunnelExperimentResults.test_experiment_flow_with_event_results_and_events_out_of_time_range_timezones.1 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT countIf(steps = 1) step_1, - countIf(steps = 2) step_2, - avg(step_1_average_conversion_time_inner) step_1_average_conversion_time, - median(step_1_median_conversion_time_inner) step_1_median_conversion_time, - prop - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - prop - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target, - prop) as max_steps, - step_1_conversion_time , - prop - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 , - if(has([['test'], ['control']], prop), prop, ['Other']) as prop - FROM - (SELECT *, - if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, ['']) as prop - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = '$pageview', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = '$pageleave', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - array(replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', '')) AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, notEmpty(arrayFilter(x -> notEmpty(x), prop))) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['$pageleave', '$pageview'] - AND toTimeZone(timestamp, 'Europe/Amsterdam') >= toDateTime('2020-01-01 14:20:21', 'Europe/Amsterdam') - AND toTimeZone(timestamp, 'Europe/Amsterdam') <= toDateTime('2020-01-06 10:00:00', 'Europe/Amsterdam') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event IN ['$pageleave', '$pageview'] - AND toTimeZone(timestamp, 'Europe/Amsterdam') >= toDateTime('2020-01-01 14:20:21', 'Europe/Amsterdam') - AND toTimeZone(timestamp, 'Europe/Amsterdam') <= toDateTime('2020-01-06 10:00:00', 'Europe/Amsterdam') - AND (step_0 = 1 - OR step_1 = 1) ))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps, - prop - HAVING steps = max(max_steps)) - GROUP BY prop - ''' -# --- -# name: ClickhouseTestFunnelExperimentResults.test_experiment_flow_with_event_results_and_events_out_of_time_range_timezones.2 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT countIf(steps = 1) step_1, - countIf(steps = 2) step_2, - avg(step_1_average_conversion_time_inner) step_1_average_conversion_time, - median(step_1_median_conversion_time_inner) step_1_median_conversion_time, - prop - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - prop - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target, - prop) as max_steps, - step_1_conversion_time , - prop - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 , - if(has([['test'], ['control']], prop), prop, ['Other']) as prop - FROM - (SELECT *, - if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, ['']) as prop - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = '$pageview', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = '$pageleave', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - array(replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', '')) AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, notEmpty(arrayFilter(x -> notEmpty(x), prop))) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['$pageleave', '$pageview'] - AND toTimeZone(timestamp, 'Europe/Amsterdam') >= toDateTime('2020-01-01 14:20:21', 'Europe/Amsterdam') - AND toTimeZone(timestamp, 'Europe/Amsterdam') <= toDateTime('2020-01-06 10:00:00', 'Europe/Amsterdam') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event IN ['$pageleave', '$pageview'] - AND toTimeZone(timestamp, 'Europe/Amsterdam') >= toDateTime('2020-01-01 14:20:21', 'Europe/Amsterdam') - AND toTimeZone(timestamp, 'Europe/Amsterdam') <= toDateTime('2020-01-06 10:00:00', 'Europe/Amsterdam') - AND (step_0 = 1 - OR step_1 = 1) ))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps, - prop - HAVING steps = max(max_steps)) - GROUP BY prop - ''' -# --- -# name: ClickhouseTestFunnelExperimentResults.test_experiment_flow_with_event_results_for_three_test_variants - ''' - /* user_id:0 request:_snapshot_ */ - SELECT array(replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', '')) AS value, - count(*) as count - FROM events e - WHERE team_id = 99999 - AND event IN ['$pageleave', '$pageview'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: ClickhouseTestFunnelExperimentResults.test_experiment_flow_with_event_results_for_three_test_variants.1 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT countIf(steps = 1) step_1, - countIf(steps = 2) step_2, - avg(step_1_average_conversion_time_inner) step_1_average_conversion_time, - median(step_1_median_conversion_time_inner) step_1_median_conversion_time, - prop - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - prop - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target, - prop) as max_steps, - step_1_conversion_time , - prop - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 , - if(has([[''], ['test_1'], ['test'], ['control'], ['unknown_3'], ['unknown_2'], ['unknown_1'], ['test_2']], prop), prop, ['Other']) as prop - FROM - (SELECT *, - if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, ['']) as prop - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = '$pageview', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = '$pageleave', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - array(replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', '')) AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, notEmpty(arrayFilter(x -> notEmpty(x), prop))) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['$pageleave', '$pageview'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event IN ['$pageleave', '$pageview'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') - AND (step_0 = 1 - OR step_1 = 1) ))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps, - prop - HAVING steps = max(max_steps)) - GROUP BY prop - ''' -# --- -# name: ClickhouseTestFunnelExperimentResults.test_experiment_flow_with_event_results_for_three_test_variants.2 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT countIf(steps = 1) step_1, - countIf(steps = 2) step_2, - avg(step_1_average_conversion_time_inner) step_1_average_conversion_time, - median(step_1_median_conversion_time_inner) step_1_median_conversion_time, - prop - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - prop - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target, - prop) as max_steps, - step_1_conversion_time , - prop - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 , - if(has([[''], ['test_1'], ['test'], ['control'], ['unknown_3'], ['unknown_2'], ['unknown_1'], ['test_2']], prop), prop, ['Other']) as prop - FROM - (SELECT *, - if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, ['']) as prop - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = '$pageview', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = '$pageleave', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - array(replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', '')) AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, notEmpty(arrayFilter(x -> notEmpty(x), prop))) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['$pageleave', '$pageview'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event IN ['$pageleave', '$pageview'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') - AND (step_0 = 1 - OR step_1 = 1) ))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps, - prop - HAVING steps = max(max_steps)) - GROUP BY prop - ''' -# --- -# name: ClickhouseTestFunnelExperimentResults.test_experiment_flow_with_event_results_with_hogql_aggregation - ''' - /* user_id:0 request:_snapshot_ */ - SELECT array(replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', '')) AS value, - count(*) as count - FROM events e - WHERE team_id = 99999 - AND event IN ['$pageleave', '$pageview'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: ClickhouseTestFunnelExperimentResults.test_experiment_flow_with_event_results_with_hogql_aggregation.1 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT countIf(steps = 1) step_1, - countIf(steps = 2) step_2, - avg(step_1_average_conversion_time_inner) step_1_average_conversion_time, - median(step_1_median_conversion_time_inner) step_1_median_conversion_time, - prop - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - prop - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target, - prop) as max_steps, - step_1_conversion_time , - prop - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 , - if(has([['test'], ['control'], ['']], prop), prop, ['Other']) as prop - FROM - (SELECT *, - if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, ['']) as prop - FROM - (SELECT e.timestamp as timestamp, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$account_id'), ''), 'null'), '^"|"$', '') as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = '$pageview', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = '$pageleave', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - array(replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', '')) AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, notEmpty(arrayFilter(x -> notEmpty(x), prop))) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['$pageleave', '$pageview'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event IN ['$pageleave', '$pageview'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') - AND (step_0 = 1 - OR step_1 = 1) ))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps, - prop - HAVING steps = max(max_steps)) - GROUP BY prop - ''' -# --- -# name: ClickhouseTestFunnelExperimentResults.test_experiment_flow_with_event_results_with_hogql_aggregation.2 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT countIf(steps = 1) step_1, - countIf(steps = 2) step_2, - avg(step_1_average_conversion_time_inner) step_1_average_conversion_time, - median(step_1_median_conversion_time_inner) step_1_median_conversion_time, - prop - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - prop - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target, - prop) as max_steps, - step_1_conversion_time , - prop - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 , - if(has([['test'], ['control'], ['']], prop), prop, ['Other']) as prop - FROM - (SELECT *, - if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, ['']) as prop - FROM - (SELECT e.timestamp as timestamp, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$account_id'), ''), 'null'), '^"|"$', '') as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = '$pageview', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = '$pageleave', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - array(replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', '')) AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, notEmpty(arrayFilter(x -> notEmpty(x), prop))) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['$pageleave', '$pageview'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event IN ['$pageleave', '$pageview'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') - AND (step_0 = 1 - OR step_1 = 1) ))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps, - prop - HAVING steps = max(max_steps)) - GROUP BY prop - ''' -# --- -# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results - ''' - /* user_id:0 request:_snapshot_ */ - SELECT replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', '') AS value, - count(*) as count - FROM events e - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') - AND (((isNull(replaceRegexpAll(JSONExtractRaw(e.properties, 'exclude'), '^"|"$', '')) - OR NOT JSONHas(e.properties, 'exclude'))) - AND (has(['control', 'test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature/a-b-test'), '^"|"$', '')))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results.1 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT groupArray(day_start) as date, - groupArray(count) AS total, - breakdown_value - FROM - (SELECT SUM(total) as count, - day_start, - breakdown_value - FROM - (SELECT * - FROM - (SELECT toUInt16(0) AS total, - ticks.day_start as day_start, - breakdown_value - FROM - (SELECT toStartOfDay(toDateTime('2020-01-06 00:00:00', 'UTC')) - toIntervalDay(number) as day_start - FROM numbers(6) - UNION ALL SELECT toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')) as day_start) as ticks - CROSS JOIN - (SELECT breakdown_value - FROM - (SELECT ['test', 'control'] as breakdown_value) ARRAY - JOIN breakdown_value) as sec - ORDER BY breakdown_value, - day_start - UNION ALL SELECT count(*) as total, - toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) as day_start, - transform(ifNull(nullIf(replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', ''), ''), '$$_posthog_breakdown_null_$$'), (['test', 'control']), (['test', 'control']), '$$_posthog_breakdown_other_$$') as breakdown_value - FROM events e - WHERE e.team_id = 99999 - AND event = '$pageview' - AND (((isNull(replaceRegexpAll(JSONExtractRaw(e.properties, 'exclude'), '^"|"$', '')) - OR NOT JSONHas(e.properties, 'exclude'))) - AND (has(['control', 'test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature/a-b-test'), '^"|"$', '')))) - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') - GROUP BY day_start, - breakdown_value)) - GROUP BY day_start, - breakdown_value - ORDER BY breakdown_value, - day_start) - GROUP BY breakdown_value - ORDER BY breakdown_value - ''' -# --- -# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results.2 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT replaceRegexpAll(JSONExtractRaw(properties, '$feature_flag_response'), '^"|"$', '') AS value, - count(*) as count - FROM events e - WHERE team_id = 99999 - AND event = '$feature_flag_called' - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') - AND (((isNull(replaceRegexpAll(JSONExtractRaw(e.properties, 'exclude'), '^"|"$', '')) - OR NOT JSONHas(e.properties, 'exclude'))) - AND ((has(['control', 'test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature_flag_response'), '^"|"$', ''))) - AND (has(['a-b-test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature_flag'), '^"|"$', ''))))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results.3 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT groupArray(day_start) as date, - groupArray(count) AS total, - breakdown_value - FROM - (SELECT SUM(total) as count, - day_start, - breakdown_value - FROM - (SELECT * - FROM - (SELECT toUInt16(0) AS total, - ticks.day_start as day_start, - breakdown_value - FROM - (SELECT toStartOfDay(toDateTime('2020-01-06 00:00:00', 'UTC')) - toIntervalDay(number) as day_start - FROM numbers(6) - UNION ALL SELECT toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')) as day_start) as ticks - CROSS JOIN - (SELECT breakdown_value - FROM - (SELECT ['control', 'test'] as breakdown_value) ARRAY - JOIN breakdown_value) as sec - ORDER BY breakdown_value, - day_start - UNION ALL SELECT count(DISTINCT person_id) as total, - toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) as day_start, - breakdown_value - FROM - (SELECT person_id, - min(timestamp) as timestamp, - breakdown_value - FROM - (SELECT pdi.person_id as person_id, timestamp, transform(ifNull(nullIf(replaceRegexpAll(JSONExtractRaw(properties, '$feature_flag_response'), '^"|"$', ''), ''), '$$_posthog_breakdown_null_$$'), (['control', 'test']), (['control', 'test']), '$$_posthog_breakdown_other_$$') as breakdown_value - FROM events e - INNER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) as pdi ON events.distinct_id = pdi.distinct_id - WHERE e.team_id = 99999 - AND event = '$feature_flag_called' - AND (((isNull(replaceRegexpAll(JSONExtractRaw(e.properties, 'exclude'), '^"|"$', '')) - OR NOT JSONHas(e.properties, 'exclude'))) - AND ((has(['control', 'test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature_flag_response'), '^"|"$', ''))) - AND (has(['a-b-test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature_flag'), '^"|"$', ''))))) - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') ) - GROUP BY person_id, - breakdown_value) AS pdi - GROUP BY day_start, - breakdown_value)) - GROUP BY day_start, - breakdown_value - ORDER BY breakdown_value, - day_start) - GROUP BY breakdown_value - ORDER BY breakdown_value - ''' -# --- -# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results.4 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT groupArray(day_start) as date, - groupArray(count) AS total, - breakdown_value - FROM - (SELECT SUM(total) as count, - day_start, - breakdown_value - FROM - (SELECT * - FROM - (SELECT toUInt16(0) AS total, - ticks.day_start as day_start, - breakdown_value - FROM - (SELECT toStartOfDay(toDateTime('2020-01-06 00:00:00', 'UTC')) - toIntervalDay(number) as day_start - FROM numbers(6) - UNION ALL SELECT toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')) as day_start) as ticks - CROSS JOIN - (SELECT breakdown_value - FROM - (SELECT ['control', 'test'] as breakdown_value) ARRAY - JOIN breakdown_value) as sec - ORDER BY breakdown_value, - day_start - UNION ALL SELECT count(DISTINCT person_id) as total, - toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) as day_start, - breakdown_value - FROM - (SELECT person_id, - min(timestamp) as timestamp, - breakdown_value - FROM - (SELECT pdi.person_id as person_id, timestamp, transform(ifNull(nullIf(replaceRegexpAll(JSONExtractRaw(properties, '$feature_flag_response'), '^"|"$', ''), ''), '$$_posthog_breakdown_null_$$'), (['control', 'test']), (['control', 'test']), '$$_posthog_breakdown_other_$$') as breakdown_value - FROM events e - INNER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) as pdi ON events.distinct_id = pdi.distinct_id - WHERE e.team_id = 99999 - AND event = '$feature_flag_called' - AND (((isNull(replaceRegexpAll(JSONExtractRaw(e.properties, 'exclude'), '^"|"$', '')) - OR NOT JSONHas(e.properties, 'exclude'))) - AND ((has(['control', 'test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature_flag_response'), '^"|"$', ''))) - AND (has(['a-b-test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature_flag'), '^"|"$', ''))))) - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') ) - GROUP BY person_id, - breakdown_value) AS pdi - GROUP BY day_start, - breakdown_value)) - GROUP BY day_start, - breakdown_value - ORDER BY breakdown_value, - day_start) - GROUP BY breakdown_value - ORDER BY breakdown_value - ''' -# --- -# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results_for_three_test_variants - ''' - /* user_id:0 request:_snapshot_ */ - SELECT replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', '') AS value, - count(*) as count - FROM events e - WHERE team_id = 99999 - AND event = '$pageview1' - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') - AND (has(['control', 'test_1', 'test_2', 'test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature/a-b-test'), '^"|"$', ''))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results_for_three_test_variants.1 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT groupArray(day_start) as date, - groupArray(count) AS total, - breakdown_value - FROM - (SELECT SUM(total) as count, - day_start, - breakdown_value - FROM - (SELECT * - FROM - (SELECT toUInt16(0) AS total, - ticks.day_start as day_start, - breakdown_value - FROM - (SELECT toStartOfDay(toDateTime('2020-01-06 00:00:00', 'UTC')) - toIntervalDay(number) as day_start - FROM numbers(6) - UNION ALL SELECT toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')) as day_start) as ticks - CROSS JOIN - (SELECT breakdown_value - FROM - (SELECT ['control', 'test_1', 'test_2'] as breakdown_value) ARRAY - JOIN breakdown_value) as sec - ORDER BY breakdown_value, - day_start - UNION ALL SELECT count(*) as total, - toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) as day_start, - transform(ifNull(nullIf(replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', ''), ''), '$$_posthog_breakdown_null_$$'), (['control', 'test_1', 'test_2']), (['control', 'test_1', 'test_2']), '$$_posthog_breakdown_other_$$') as breakdown_value - FROM events e - WHERE e.team_id = 99999 - AND event = '$pageview1' - AND (has(['control', 'test_1', 'test_2', 'test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature/a-b-test'), '^"|"$', ''))) - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') - GROUP BY day_start, - breakdown_value)) - GROUP BY day_start, - breakdown_value - ORDER BY breakdown_value, - day_start) - GROUP BY breakdown_value - ORDER BY breakdown_value - ''' -# --- -# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results_for_three_test_variants.2 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT replaceRegexpAll(JSONExtractRaw(properties, '$feature_flag_response'), '^"|"$', '') AS value, - count(*) as count - FROM events e - WHERE team_id = 99999 - AND event = '$feature_flag_called' - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') - AND ((has(['control', 'test_1', 'test_2', 'test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature_flag_response'), '^"|"$', ''))) - AND (has(['a-b-test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature_flag'), '^"|"$', '')))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results_for_three_test_variants.3 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT [now()] AS date, - [0] AS total, - '' AS breakdown_value - LIMIT 0 - ''' -# --- -# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results_for_three_test_variants.4 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT [now()] AS date, - [0] AS total, - '' AS breakdown_value - LIMIT 0 - ''' -# --- -# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results_out_of_timerange_timezone - ''' - /* user_id:0 request:_snapshot_ */ - SELECT replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', '') AS value, - count(*) as count - FROM events e - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'US/Pacific') >= toDateTime('2020-01-01 02:10:00', 'US/Pacific') - AND toTimeZone(timestamp, 'US/Pacific') <= toDateTime('2020-01-06 07:00:00', 'US/Pacific') - AND (has(['control', 'test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature/a-b-test'), '^"|"$', ''))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results_out_of_timerange_timezone.1 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT groupArray(day_start) as date, - groupArray(count) AS total, - breakdown_value - FROM - (SELECT SUM(total) as count, - day_start, - breakdown_value - FROM - (SELECT * - FROM - (SELECT toUInt16(0) AS total, - ticks.day_start as day_start, - breakdown_value - FROM - (SELECT toStartOfDay(toDateTime('2020-01-06 07:00:00', 'US/Pacific')) - toIntervalDay(number) as day_start - FROM numbers(6) - UNION ALL SELECT toStartOfDay(toDateTime('2020-01-01 02:10:00', 'US/Pacific')) as day_start) as ticks - CROSS JOIN - (SELECT breakdown_value - FROM - (SELECT ['test', 'control'] as breakdown_value) ARRAY - JOIN breakdown_value) as sec - ORDER BY breakdown_value, - day_start - UNION ALL SELECT count(*) as total, - toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'US/Pacific')) as day_start, - transform(ifNull(nullIf(replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', ''), ''), '$$_posthog_breakdown_null_$$'), (['test', 'control']), (['test', 'control']), '$$_posthog_breakdown_other_$$') as breakdown_value - FROM events e - WHERE e.team_id = 99999 - AND event = '$pageview' - AND (has(['control', 'test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature/a-b-test'), '^"|"$', ''))) - AND toTimeZone(timestamp, 'US/Pacific') >= toDateTime('2020-01-01 02:10:00', 'US/Pacific') - AND toTimeZone(timestamp, 'US/Pacific') <= toDateTime('2020-01-06 07:00:00', 'US/Pacific') - GROUP BY day_start, - breakdown_value)) - GROUP BY day_start, - breakdown_value - ORDER BY breakdown_value, - day_start) - GROUP BY breakdown_value - ORDER BY breakdown_value - ''' -# --- -# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results_out_of_timerange_timezone.2 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT replaceRegexpAll(JSONExtractRaw(properties, '$feature_flag_response'), '^"|"$', '') AS value, - count(*) as count - FROM events e - WHERE team_id = 99999 - AND event = '$feature_flag_called' - AND toTimeZone(timestamp, 'US/Pacific') >= toDateTime('2020-01-01 02:10:00', 'US/Pacific') - AND toTimeZone(timestamp, 'US/Pacific') <= toDateTime('2020-01-06 07:00:00', 'US/Pacific') - AND ((has(['control', 'test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature_flag_response'), '^"|"$', ''))) - AND (has(['a-b-test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature_flag'), '^"|"$', '')))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results_out_of_timerange_timezone.3 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT groupArray(day_start) as date, - groupArray(count) AS total, - breakdown_value - FROM - (SELECT SUM(total) as count, - day_start, - breakdown_value - FROM - (SELECT * - FROM - (SELECT toUInt16(0) AS total, - ticks.day_start as day_start, - breakdown_value - FROM - (SELECT toStartOfDay(toDateTime('2020-01-06 07:00:00', 'US/Pacific')) - toIntervalDay(number) as day_start - FROM numbers(6) - UNION ALL SELECT toStartOfDay(toDateTime('2020-01-01 02:10:00', 'US/Pacific')) as day_start) as ticks - CROSS JOIN - (SELECT breakdown_value - FROM - (SELECT ['control', 'test'] as breakdown_value) ARRAY - JOIN breakdown_value) as sec - ORDER BY breakdown_value, - day_start - UNION ALL SELECT count(DISTINCT person_id) as total, - toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'US/Pacific')) as day_start, - breakdown_value - FROM - (SELECT person_id, - min(timestamp) as timestamp, - breakdown_value - FROM - (SELECT pdi.person_id as person_id, timestamp, transform(ifNull(nullIf(replaceRegexpAll(JSONExtractRaw(properties, '$feature_flag_response'), '^"|"$', ''), ''), '$$_posthog_breakdown_null_$$'), (['control', 'test']), (['control', 'test']), '$$_posthog_breakdown_other_$$') as breakdown_value - FROM events e - INNER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) as pdi ON events.distinct_id = pdi.distinct_id - WHERE e.team_id = 99999 - AND event = '$feature_flag_called' - AND ((has(['control', 'test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature_flag_response'), '^"|"$', ''))) - AND (has(['a-b-test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature_flag'), '^"|"$', '')))) - AND toTimeZone(timestamp, 'US/Pacific') >= toDateTime('2020-01-01 02:10:00', 'US/Pacific') - AND toTimeZone(timestamp, 'US/Pacific') <= toDateTime('2020-01-06 07:00:00', 'US/Pacific') ) - GROUP BY person_id, - breakdown_value) AS pdi - GROUP BY day_start, - breakdown_value)) - GROUP BY day_start, - breakdown_value - ORDER BY breakdown_value, - day_start) - GROUP BY breakdown_value - ORDER BY breakdown_value - ''' -# --- -# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results_out_of_timerange_timezone.4 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT groupArray(day_start) as date, - groupArray(count) AS total, - breakdown_value - FROM - (SELECT SUM(total) as count, - day_start, - breakdown_value - FROM - (SELECT * - FROM - (SELECT toUInt16(0) AS total, - ticks.day_start as day_start, - breakdown_value - FROM - (SELECT toStartOfDay(toDateTime('2020-01-06 07:00:00', 'US/Pacific')) - toIntervalDay(number) as day_start - FROM numbers(6) - UNION ALL SELECT toStartOfDay(toDateTime('2020-01-01 02:10:00', 'US/Pacific')) as day_start) as ticks - CROSS JOIN - (SELECT breakdown_value - FROM - (SELECT ['control', 'test'] as breakdown_value) ARRAY - JOIN breakdown_value) as sec - ORDER BY breakdown_value, - day_start - UNION ALL SELECT count(DISTINCT person_id) as total, - toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'US/Pacific')) as day_start, - breakdown_value - FROM - (SELECT person_id, - min(timestamp) as timestamp, - breakdown_value - FROM - (SELECT pdi.person_id as person_id, timestamp, transform(ifNull(nullIf(replaceRegexpAll(JSONExtractRaw(properties, '$feature_flag_response'), '^"|"$', ''), ''), '$$_posthog_breakdown_null_$$'), (['control', 'test']), (['control', 'test']), '$$_posthog_breakdown_other_$$') as breakdown_value - FROM events e - INNER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) as pdi ON events.distinct_id = pdi.distinct_id - WHERE e.team_id = 99999 - AND event = '$feature_flag_called' - AND ((has(['control', 'test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature_flag_response'), '^"|"$', ''))) - AND (has(['a-b-test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature_flag'), '^"|"$', '')))) - AND toTimeZone(timestamp, 'US/Pacific') >= toDateTime('2020-01-01 02:10:00', 'US/Pacific') - AND toTimeZone(timestamp, 'US/Pacific') <= toDateTime('2020-01-06 07:00:00', 'US/Pacific') ) - GROUP BY person_id, - breakdown_value) AS pdi - GROUP BY day_start, - breakdown_value)) - GROUP BY day_start, - breakdown_value - ORDER BY breakdown_value, - day_start) - GROUP BY breakdown_value - ORDER BY breakdown_value - ''' -# --- -# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results_with_hogql_filter - ''' - /* user_id:0 request:_snapshot_ */ - SELECT replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', '') AS value, - count(*) as count - FROM events e - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') - AND ((has(['control', 'test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature/a-b-test'), '^"|"$', ''))) - AND (ifNull(ilike(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, 'hogql'), ''), 'null'), '^"|"$', ''), 'true'), 0))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results_with_hogql_filter.1 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT groupArray(day_start) as date, - groupArray(count) AS total, - breakdown_value - FROM - (SELECT SUM(total) as count, - day_start, - breakdown_value - FROM - (SELECT * - FROM - (SELECT toUInt16(0) AS total, - ticks.day_start as day_start, - breakdown_value - FROM - (SELECT toStartOfDay(toDateTime('2020-01-06 00:00:00', 'UTC')) - toIntervalDay(number) as day_start - FROM numbers(6) - UNION ALL SELECT toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')) as day_start) as ticks - CROSS JOIN - (SELECT breakdown_value - FROM - (SELECT ['test', 'control'] as breakdown_value) ARRAY - JOIN breakdown_value) as sec - ORDER BY breakdown_value, - day_start - UNION ALL SELECT count(*) as total, - toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) as day_start, - transform(ifNull(nullIf(replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', ''), ''), '$$_posthog_breakdown_null_$$'), (['test', 'control']), (['test', 'control']), '$$_posthog_breakdown_other_$$') as breakdown_value - FROM events e - WHERE e.team_id = 99999 - AND event = '$pageview' - AND ((has(['control', 'test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature/a-b-test'), '^"|"$', ''))) - AND (ifNull(ilike(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, 'hogql'), ''), 'null'), '^"|"$', ''), 'true'), 0))) - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') - GROUP BY day_start, - breakdown_value)) - GROUP BY day_start, - breakdown_value - ORDER BY breakdown_value, - day_start) - GROUP BY breakdown_value - ORDER BY breakdown_value - ''' -# --- -# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results_with_hogql_filter.2 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT replaceRegexpAll(JSONExtractRaw(properties, '$feature_flag_response'), '^"|"$', '') AS value, - count(*) as count - FROM events e - WHERE team_id = 99999 - AND event = '$feature_flag_called' - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') - AND ((has(['control', 'test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature_flag_response'), '^"|"$', ''))) - AND (has(['a-b-test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature_flag'), '^"|"$', '')))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results_with_hogql_filter.3 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT groupArray(day_start) as date, - groupArray(count) AS total, - breakdown_value - FROM - (SELECT SUM(total) as count, - day_start, - breakdown_value - FROM - (SELECT * - FROM - (SELECT toUInt16(0) AS total, - ticks.day_start as day_start, - breakdown_value - FROM - (SELECT toStartOfDay(toDateTime('2020-01-06 00:00:00', 'UTC')) - toIntervalDay(number) as day_start - FROM numbers(6) - UNION ALL SELECT toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')) as day_start) as ticks - CROSS JOIN - (SELECT breakdown_value - FROM - (SELECT ['control', 'test'] as breakdown_value) ARRAY - JOIN breakdown_value) as sec - ORDER BY breakdown_value, - day_start - UNION ALL SELECT count(DISTINCT person_id) as total, - toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) as day_start, - breakdown_value - FROM - (SELECT person_id, - min(timestamp) as timestamp, - breakdown_value - FROM - (SELECT pdi.person_id as person_id, timestamp, transform(ifNull(nullIf(replaceRegexpAll(JSONExtractRaw(properties, '$feature_flag_response'), '^"|"$', ''), ''), '$$_posthog_breakdown_null_$$'), (['control', 'test']), (['control', 'test']), '$$_posthog_breakdown_other_$$') as breakdown_value - FROM events e - INNER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) as pdi ON events.distinct_id = pdi.distinct_id - WHERE e.team_id = 99999 - AND event = '$feature_flag_called' - AND ((has(['control', 'test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature_flag_response'), '^"|"$', ''))) - AND (has(['a-b-test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature_flag'), '^"|"$', '')))) - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') ) - GROUP BY person_id, - breakdown_value) AS pdi - GROUP BY day_start, - breakdown_value)) - GROUP BY day_start, - breakdown_value - ORDER BY breakdown_value, - day_start) - GROUP BY breakdown_value - ORDER BY breakdown_value - ''' -# --- -# name: ClickhouseTestTrendExperimentResults.test_experiment_flow_with_event_results_with_hogql_filter.4 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT groupArray(day_start) as date, - groupArray(count) AS total, - breakdown_value - FROM - (SELECT SUM(total) as count, - day_start, - breakdown_value - FROM - (SELECT * - FROM - (SELECT toUInt16(0) AS total, - ticks.day_start as day_start, - breakdown_value - FROM - (SELECT toStartOfDay(toDateTime('2020-01-06 00:00:00', 'UTC')) - toIntervalDay(number) as day_start - FROM numbers(6) - UNION ALL SELECT toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')) as day_start) as ticks - CROSS JOIN - (SELECT breakdown_value - FROM - (SELECT ['control', 'test'] as breakdown_value) ARRAY - JOIN breakdown_value) as sec - ORDER BY breakdown_value, - day_start - UNION ALL SELECT count(DISTINCT person_id) as total, - toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) as day_start, - breakdown_value - FROM - (SELECT person_id, - min(timestamp) as timestamp, - breakdown_value - FROM - (SELECT pdi.person_id as person_id, timestamp, transform(ifNull(nullIf(replaceRegexpAll(JSONExtractRaw(properties, '$feature_flag_response'), '^"|"$', ''), ''), '$$_posthog_breakdown_null_$$'), (['control', 'test']), (['control', 'test']), '$$_posthog_breakdown_other_$$') as breakdown_value - FROM events e - INNER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) as pdi ON events.distinct_id = pdi.distinct_id - WHERE e.team_id = 99999 - AND event = '$feature_flag_called' - AND ((has(['control', 'test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature_flag_response'), '^"|"$', ''))) - AND (has(['a-b-test'], replaceRegexpAll(JSONExtractRaw(e.properties, '$feature_flag'), '^"|"$', '')))) - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') ) - GROUP BY person_id, - breakdown_value) AS pdi - GROUP BY day_start, - breakdown_value)) - GROUP BY day_start, - breakdown_value - ORDER BY breakdown_value, - day_start) - GROUP BY breakdown_value - ORDER BY breakdown_value - ''' -# --- -# name: TestExperimentAuxiliaryEndpoints.test_create_exposure_cohort_for_experiment_with_custom_action_filters_exposure - ''' - /* cohort_calculation: */ - INSERT INTO cohortpeople - SELECT id, - 99999 as cohort_id, - 99999 as team_id, - 1 AS sign, - 1 AS version - FROM - (SELECT id - FROM person - WHERE team_id = 99999 - AND id IN - (SELECT id - FROM person - WHERE team_id = 99999 - AND ((has(['http://example.com'], replaceRegexpAll(JSONExtractRaw(properties, '$pageview'), '^"|"$', '')))) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND ((has(['http://example.com'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), '$pageview'), '^"|"$', '')))) SETTINGS optimize_aggregation_in_order = 1) as person - UNION ALL - SELECT person_id, - cohort_id, - team_id, - -1, - version - FROM cohortpeople - WHERE team_id = 99999 - AND cohort_id = 99999 - AND version < 1 - AND sign = 1 SETTINGS optimize_aggregation_in_order = 1, - join_algorithm = 'auto' - ''' -# --- -# name: TestExperimentAuxiliaryEndpoints.test_create_exposure_cohort_for_experiment_with_custom_action_filters_exposure.1 - ''' - /* user_id:0 cohort_calculation:posthog.tasks.calculate_cohort.calculate_cohort_ch */ - INSERT INTO cohortpeople - SELECT id, - 99999 as cohort_id, - 99999 as team_id, - 1 AS sign, - 1 AS version - FROM - (SELECT behavior_query.person_id AS id - FROM - (SELECT pdi.person_id AS person_id, - countIf(timestamp > 'explicit_timestamp' - AND timestamp < now() - AND ((event = 'insight viewed' - AND (has(['RETENTION'], replaceRegexpAll(JSONExtractRaw(properties, 'insight'), '^"|"$', '')) - AND distinct_id IN - (SELECT distinct_id - FROM - (SELECT distinct_id, argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) - WHERE person_id IN - (SELECT id - FROM person - WHERE team_id = 99999 - AND id IN - (SELECT id - FROM person - WHERE team_id = 99999 - AND ((has(['http://example.com'], replaceRegexpAll(JSONExtractRaw(properties, '$pageview'), '^"|"$', '')))) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND ((has(['http://example.com'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), '$pageview'), '^"|"$', '')))) SETTINGS optimize_aggregation_in_order = 1) ))) - OR (event = 'insight viewed' - AND (toFloat64OrNull(replaceRegexpAll(replaceRegexpAll(replaceRegexpAll(JSONExtractRaw(properties, 'filters_count'), '^"|"$', ''), ' ', ''), '^"|"$', '')) > '1')) - OR (match(replaceRegexpAll(JSONExtractRaw(properties, '$current_url'), '^"|"$', ''), '/123') - AND event = '$autocapture')) - AND (has(['bonk'], replaceRegexpAll(JSONExtractRaw(properties, 'bonk'), '^"|"$', '')) - AND ifNull(in(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$current_url'), ''), 'null'), '^"|"$', ''), tuple('x', 'y')), 0))) > 0 AS performed_event_condition_X_level_level_0_level_0_0 - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event IN ['insight viewed', 'insight viewed', '$autocapture'] - AND timestamp <= now() - AND timestamp >= now() - INTERVAL 6 day - GROUP BY person_id) behavior_query - WHERE 1 = 1 - AND (((performed_event_condition_X_level_level_0_level_0_0))) SETTINGS optimize_aggregation_in_order = 1, - join_algorithm = 'auto' ) as person - UNION ALL - SELECT person_id, - cohort_id, - team_id, - -1, - version - FROM cohortpeople - WHERE team_id = 99999 - AND cohort_id = 99999 - AND version < 1 - AND sign = 1 SETTINGS optimize_aggregation_in_order = 1, - join_algorithm = 'auto' - ''' -# --- diff --git a/ee/clickhouse/views/test/__snapshots__/test_clickhouse_groups.ambr b/ee/clickhouse/views/test/__snapshots__/test_clickhouse_groups.ambr deleted file mode 100644 index b9bfce22b4..0000000000 --- a/ee/clickhouse/views/test/__snapshots__/test_clickhouse_groups.ambr +++ /dev/null @@ -1,90 +0,0 @@ -# serializer version: 1 -# name: ClickhouseTestGroupsApi.test_related_groups - ''' - /* user_id:0 request:_snapshot_ */ - SELECT DISTINCT pdi.person_id - FROM events e - JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) pdi on e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND timestamp > '2021-02-09T00:00:00.000000' - AND timestamp < '2021-05-10T00:00:00.000000' - AND $group_0 = '0::0' - ''' -# --- -# name: ClickhouseTestGroupsApi.test_related_groups.1 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT DISTINCT $group_1 AS group_key - FROM events e - JOIN - (SELECT group_key - FROM groups - WHERE team_id = 99999 - AND group_type_index = 1 - GROUP BY group_key) groups ON $group_1 = groups.group_key - WHERE team_id = 99999 - AND timestamp > '2021-02-09T00:00:00.000000' - AND timestamp < '2021-05-10T00:00:00.000000' - AND group_key != '' - AND $group_0 = '0::0' - ORDER BY group_key - ''' -# --- -# name: ClickhouseTestGroupsApi.test_related_groups_person - ''' - /* user_id:0 request:_snapshot_ */ - SELECT DISTINCT $group_0 AS group_key - FROM events e - JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) pdi on e.distinct_id = pdi.distinct_id - JOIN - (SELECT group_key - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups ON $group_0 = groups.group_key - WHERE team_id = 99999 - AND timestamp > '2021-02-09T00:00:00.000000' - AND timestamp < '2021-05-10T00:00:00.000000' - AND group_key != '' - AND pdi.person_id = '01795392-cc00-0003-7dc7-67a694604d72' - ORDER BY group_key - ''' -# --- -# name: ClickhouseTestGroupsApi.test_related_groups_person.1 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT DISTINCT $group_1 AS group_key - FROM events e - JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) pdi on e.distinct_id = pdi.distinct_id - JOIN - (SELECT group_key - FROM groups - WHERE team_id = 99999 - AND group_type_index = 1 - GROUP BY group_key) groups ON $group_1 = groups.group_key - WHERE team_id = 99999 - AND timestamp > '2021-02-09T00:00:00.000000' - AND timestamp < '2021-05-10T00:00:00.000000' - AND group_key != '' - AND pdi.person_id = '01795392-cc00-0003-7dc7-67a694604d72' - ORDER BY group_key - ''' -# --- diff --git a/ee/clickhouse/views/test/__snapshots__/test_clickhouse_retention.ambr b/ee/clickhouse/views/test/__snapshots__/test_clickhouse_retention.ambr deleted file mode 100644 index 109942e765..0000000000 --- a/ee/clickhouse/views/test/__snapshots__/test_clickhouse_retention.ambr +++ /dev/null @@ -1,493 +0,0 @@ -# serializer version: 1 -# name: RetentionTests.test_retention_aggregation_by_distinct_id_and_retrieve_people - ''' - /* user_id:0 request:_snapshot_ */ WITH actor_query AS - (WITH 'Day' as period, - NULL as breakdown_values_filter, - NULL as selected_interval, - returning_event_query as - (SELECT toStartOfDay(toTimeZone(toDateTime(e.timestamp, 'UTC'), 'UTC')) AS event_date, - e.distinct_id AS target - FROM events e - WHERE team_id = 99999 - AND e.event = 'target event' - AND toDateTime(e.timestamp) >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toDateTime(e.timestamp) <= toDateTime('2020-01-04 00:00:00', 'UTC') - GROUP BY target, - event_date), - target_event_query as - (SELECT min(toStartOfDay(toTimeZone(toDateTime(e.timestamp, 'UTC'), 'UTC'))) as event_date, - e.distinct_id AS target, - [ - dateDiff( - 'Day', - toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')), - toStartOfDay(toTimeZone(toDateTime(min(e.timestamp), 'UTC'), 'UTC')) - ) - ] as breakdown_values - FROM events e - WHERE team_id = 99999 - AND e.event = 'target event' - GROUP BY target - HAVING event_date >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND event_date <= toDateTime('2020-01-04 00:00:00', 'UTC')) SELECT DISTINCT breakdown_values, - intervals_from_base, - actor_id - FROM - (SELECT target_event.breakdown_values AS breakdown_values, - datediff(period, target_event.event_date, returning_event.event_date) AS intervals_from_base, - returning_event.target AS actor_id - FROM target_event_query AS target_event - JOIN returning_event_query AS returning_event ON returning_event.target = target_event.target - WHERE returning_event.event_date > target_event.event_date - UNION ALL SELECT target_event.breakdown_values AS breakdown_values, - 0 AS intervals_from_base, - target_event.target AS actor_id - FROM target_event_query AS target_event) - WHERE (breakdown_values_filter is NULL - OR breakdown_values = breakdown_values_filter) - AND (selected_interval is NULL - OR intervals_from_base = selected_interval) ) - SELECT actor_activity.breakdown_values AS breakdown_values, - actor_activity.intervals_from_base AS intervals_from_base, - COUNT(DISTINCT actor_activity.actor_id) AS count - FROM actor_query AS actor_activity - GROUP BY breakdown_values, - intervals_from_base - ORDER BY breakdown_values, - intervals_from_base - ''' -# --- -# name: RetentionTests.test_retention_aggregation_by_distinct_id_and_retrieve_people.1 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT actor_id, - groupArray(actor_activity.intervals_from_base) AS appearances - FROM - (WITH 'Day' as period, - [0] as breakdown_values_filter, - NULL as selected_interval, - returning_event_query as - (SELECT toStartOfDay(toTimeZone(toDateTime(e.timestamp, 'UTC'), 'UTC')) AS event_date, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as target - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND e.event = 'target event' - AND toDateTime(e.timestamp) >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toDateTime(e.timestamp) <= toDateTime('2020-01-04 00:00:00', 'UTC') - GROUP BY target, - event_date), - target_event_query as - (SELECT min(toStartOfDay(toTimeZone(toDateTime(e.timestamp, 'UTC'), 'UTC'))) as event_date, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as target, - [ - dateDiff( - 'Day', - toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')), - toStartOfDay(toTimeZone(toDateTime(min(e.timestamp), 'UTC'), 'UTC')) - ) - ] as breakdown_values - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND e.event = 'target event' - GROUP BY target - HAVING event_date >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND event_date <= toDateTime('2020-01-04 00:00:00', 'UTC')) SELECT DISTINCT breakdown_values, - intervals_from_base, - actor_id - FROM - (SELECT target_event.breakdown_values AS breakdown_values, - datediff(period, target_event.event_date, returning_event.event_date) AS intervals_from_base, - returning_event.target AS actor_id - FROM target_event_query AS target_event - JOIN returning_event_query AS returning_event ON returning_event.target = target_event.target - WHERE returning_event.event_date > target_event.event_date - UNION ALL SELECT target_event.breakdown_values AS breakdown_values, - 0 AS intervals_from_base, - target_event.target AS actor_id - FROM target_event_query AS target_event) - WHERE (breakdown_values_filter is NULL - OR breakdown_values = breakdown_values_filter) - AND (selected_interval is NULL - OR intervals_from_base = selected_interval) ) AS actor_activity - GROUP BY actor_id - ORDER BY length(appearances) DESC, actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: RetentionTests.test_retention_aggregation_by_distinct_id_and_retrieve_people.2 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT actor_id, - groupArray(actor_activity.intervals_from_base) AS appearances - FROM - (WITH 'Day' as period, - [1] as breakdown_values_filter, - NULL as selected_interval, - returning_event_query as - (SELECT toStartOfDay(toTimeZone(toDateTime(e.timestamp, 'UTC'), 'UTC')) AS event_date, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as target - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND e.event = 'target event' - AND toDateTime(e.timestamp) >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toDateTime(e.timestamp) <= toDateTime('2020-01-04 00:00:00', 'UTC') - GROUP BY target, - event_date), - target_event_query as - (SELECT min(toStartOfDay(toTimeZone(toDateTime(e.timestamp, 'UTC'), 'UTC'))) as event_date, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as target, - [ - dateDiff( - 'Day', - toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')), - toStartOfDay(toTimeZone(toDateTime(min(e.timestamp), 'UTC'), 'UTC')) - ) - ] as breakdown_values - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND e.event = 'target event' - GROUP BY target - HAVING event_date >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND event_date <= toDateTime('2020-01-04 00:00:00', 'UTC')) SELECT DISTINCT breakdown_values, - intervals_from_base, - actor_id - FROM - (SELECT target_event.breakdown_values AS breakdown_values, - datediff(period, target_event.event_date, returning_event.event_date) AS intervals_from_base, - returning_event.target AS actor_id - FROM target_event_query AS target_event - JOIN returning_event_query AS returning_event ON returning_event.target = target_event.target - WHERE returning_event.event_date > target_event.event_date - UNION ALL SELECT target_event.breakdown_values AS breakdown_values, - 0 AS intervals_from_base, - target_event.target AS actor_id - FROM target_event_query AS target_event) - WHERE (breakdown_values_filter is NULL - OR breakdown_values = breakdown_values_filter) - AND (selected_interval is NULL - OR intervals_from_base = selected_interval) ) AS actor_activity - GROUP BY actor_id - ORDER BY length(appearances) DESC, actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: RetentionTests.test_retention_test_account_filters - ''' - /* user_id:0 request:_snapshot_ */ WITH actor_query AS - (WITH 'Day' as period, - NULL as breakdown_values_filter, - NULL as selected_interval, - returning_event_query as - (SELECT toStartOfDay(toTimeZone(toDateTime(e.timestamp, 'UTC'), 'UTC')) AS event_date, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as target - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id - FROM person - WHERE team_id = 99999 - AND id IN - (SELECT id - FROM person - WHERE team_id = 99999 - AND (NOT (replaceRegexpAll(JSONExtractRaw(properties, 'email'), '^"|"$', '') ILIKE '%posthog.com%')) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND (NOT (replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'email'), '^"|"$', '') ILIKE '%posthog.com%')) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id - WHERE team_id = 99999 - AND e.event = 'target event' - AND toDateTime(e.timestamp) >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toDateTime(e.timestamp) <= toDateTime('2020-01-03 00:00:00', 'UTC') - GROUP BY target, - event_date), - target_event_query as - (SELECT min(toStartOfDay(toTimeZone(toDateTime(e.timestamp, 'UTC'), 'UTC'))) as event_date, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as target, - [ - dateDiff( - 'Day', - toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')), - toStartOfDay(toTimeZone(toDateTime(min(e.timestamp), 'UTC'), 'UTC')) - ) - ] as breakdown_values - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id - FROM person - WHERE team_id = 99999 - AND id IN - (SELECT id - FROM person - WHERE team_id = 99999 - AND (NOT (replaceRegexpAll(JSONExtractRaw(properties, 'email'), '^"|"$', '') ILIKE '%posthog.com%')) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND (NOT (replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'email'), '^"|"$', '') ILIKE '%posthog.com%')) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id - WHERE team_id = 99999 - AND e.event = 'target event' - GROUP BY target - HAVING event_date >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND event_date <= toDateTime('2020-01-03 00:00:00', 'UTC')) SELECT DISTINCT breakdown_values, - intervals_from_base, - actor_id - FROM - (SELECT target_event.breakdown_values AS breakdown_values, - datediff(period, target_event.event_date, returning_event.event_date) AS intervals_from_base, - returning_event.target AS actor_id - FROM target_event_query AS target_event - JOIN returning_event_query AS returning_event ON returning_event.target = target_event.target - WHERE returning_event.event_date > target_event.event_date - UNION ALL SELECT target_event.breakdown_values AS breakdown_values, - 0 AS intervals_from_base, - target_event.target AS actor_id - FROM target_event_query AS target_event) - WHERE (breakdown_values_filter is NULL - OR breakdown_values = breakdown_values_filter) - AND (selected_interval is NULL - OR intervals_from_base = selected_interval) ) - SELECT actor_activity.breakdown_values AS breakdown_values, - actor_activity.intervals_from_base AS intervals_from_base, - COUNT(DISTINCT actor_activity.actor_id) AS count - FROM actor_query AS actor_activity - GROUP BY breakdown_values, - intervals_from_base - ORDER BY breakdown_values, - intervals_from_base - ''' -# --- -# name: RetentionTests.test_retention_test_account_filters.1 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT actor_id, - groupArray(actor_activity.intervals_from_base) AS appearances - FROM - (WITH 'Day' as period, - [0] as breakdown_values_filter, - NULL as selected_interval, - returning_event_query as - (SELECT toStartOfDay(toTimeZone(toDateTime(e.timestamp, 'UTC'), 'UTC')) AS event_date, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as target - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id - FROM person - WHERE team_id = 99999 - AND id IN - (SELECT id - FROM person - WHERE team_id = 99999 - AND (NOT (replaceRegexpAll(JSONExtractRaw(properties, 'email'), '^"|"$', '') ILIKE '%posthog.com%')) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND (NOT (replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'email'), '^"|"$', '') ILIKE '%posthog.com%')) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id - WHERE team_id = 99999 - AND e.event = 'target event' - AND toDateTime(e.timestamp) >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toDateTime(e.timestamp) <= toDateTime('2020-01-03 00:00:00', 'UTC') - GROUP BY target, - event_date), - target_event_query as - (SELECT min(toStartOfDay(toTimeZone(toDateTime(e.timestamp, 'UTC'), 'UTC'))) as event_date, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as target, - [ - dateDiff( - 'Day', - toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')), - toStartOfDay(toTimeZone(toDateTime(min(e.timestamp), 'UTC'), 'UTC')) - ) - ] as breakdown_values - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id - FROM person - WHERE team_id = 99999 - AND id IN - (SELECT id - FROM person - WHERE team_id = 99999 - AND (NOT (replaceRegexpAll(JSONExtractRaw(properties, 'email'), '^"|"$', '') ILIKE '%posthog.com%')) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND (NOT (replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'email'), '^"|"$', '') ILIKE '%posthog.com%')) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id - WHERE team_id = 99999 - AND e.event = 'target event' - GROUP BY target - HAVING event_date >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND event_date <= toDateTime('2020-01-03 00:00:00', 'UTC')) SELECT DISTINCT breakdown_values, - intervals_from_base, - actor_id - FROM - (SELECT target_event.breakdown_values AS breakdown_values, - datediff(period, target_event.event_date, returning_event.event_date) AS intervals_from_base, - returning_event.target AS actor_id - FROM target_event_query AS target_event - JOIN returning_event_query AS returning_event ON returning_event.target = target_event.target - WHERE returning_event.event_date > target_event.event_date - UNION ALL SELECT target_event.breakdown_values AS breakdown_values, - 0 AS intervals_from_base, - target_event.target AS actor_id - FROM target_event_query AS target_event) - WHERE (breakdown_values_filter is NULL - OR breakdown_values = breakdown_values_filter) - AND (selected_interval is NULL - OR intervals_from_base = selected_interval) ) AS actor_activity - GROUP BY actor_id - ORDER BY length(appearances) DESC, actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: RetentionTests.test_retention_test_account_filters.2 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT actor_id, - groupArray(actor_activity.intervals_from_base) AS appearances - FROM - (WITH 'Day' as period, - [1] as breakdown_values_filter, - NULL as selected_interval, - returning_event_query as - (SELECT toStartOfDay(toTimeZone(toDateTime(e.timestamp, 'UTC'), 'UTC')) AS event_date, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as target - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id - FROM person - WHERE team_id = 99999 - AND id IN - (SELECT id - FROM person - WHERE team_id = 99999 - AND (NOT (replaceRegexpAll(JSONExtractRaw(properties, 'email'), '^"|"$', '') ILIKE '%posthog.com%')) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND (NOT (replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'email'), '^"|"$', '') ILIKE '%posthog.com%')) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id - WHERE team_id = 99999 - AND e.event = 'target event' - AND toDateTime(e.timestamp) >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toDateTime(e.timestamp) <= toDateTime('2020-01-03 00:00:00', 'UTC') - GROUP BY target, - event_date), - target_event_query as - (SELECT min(toStartOfDay(toTimeZone(toDateTime(e.timestamp, 'UTC'), 'UTC'))) as event_date, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as target, - [ - dateDiff( - 'Day', - toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')), - toStartOfDay(toTimeZone(toDateTime(min(e.timestamp), 'UTC'), 'UTC')) - ) - ] as breakdown_values - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id - FROM person - WHERE team_id = 99999 - AND id IN - (SELECT id - FROM person - WHERE team_id = 99999 - AND (NOT (replaceRegexpAll(JSONExtractRaw(properties, 'email'), '^"|"$', '') ILIKE '%posthog.com%')) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND (NOT (replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'email'), '^"|"$', '') ILIKE '%posthog.com%')) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id - WHERE team_id = 99999 - AND e.event = 'target event' - GROUP BY target - HAVING event_date >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND event_date <= toDateTime('2020-01-03 00:00:00', 'UTC')) SELECT DISTINCT breakdown_values, - intervals_from_base, - actor_id - FROM - (SELECT target_event.breakdown_values AS breakdown_values, - datediff(period, target_event.event_date, returning_event.event_date) AS intervals_from_base, - returning_event.target AS actor_id - FROM target_event_query AS target_event - JOIN returning_event_query AS returning_event ON returning_event.target = target_event.target - WHERE returning_event.event_date > target_event.event_date - UNION ALL SELECT target_event.breakdown_values AS breakdown_values, - 0 AS intervals_from_base, - target_event.target AS actor_id - FROM target_event_query AS target_event) - WHERE (breakdown_values_filter is NULL - OR breakdown_values = breakdown_values_filter) - AND (selected_interval is NULL - OR intervals_from_base = selected_interval) ) AS actor_activity - GROUP BY actor_id - ORDER BY length(appearances) DESC, actor_id - LIMIT 100 - OFFSET 0 - ''' -# --- diff --git a/ee/clickhouse/views/test/__snapshots__/test_clickhouse_stickiness.ambr b/ee/clickhouse/views/test/__snapshots__/test_clickhouse_stickiness.ambr deleted file mode 100644 index 0d92ce9f57..0000000000 --- a/ee/clickhouse/views/test/__snapshots__/test_clickhouse_stickiness.ambr +++ /dev/null @@ -1,807 +0,0 @@ -# serializer version: 1 -# name: TestClickhouseStickiness.test_aggregate_by_groups - ''' - /* user_id:0 request:_snapshot_ */ - SELECT groupArray(num_actors) AS counts, - groupArray(num_intervals) AS intervals - FROM - (SELECT sum(num_actors) AS num_actors, - num_intervals AS num_intervals - FROM - (SELECT 0 AS num_actors, - plus(numbers.number, 1) AS num_intervals - FROM numbers(dateDiff('week', toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), 0), plus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-02-15 23:59:59', 6, 'UTC')), 0), toIntervalWeek(1)))) AS numbers - UNION ALL SELECT count(DISTINCT aggregation_target) AS num_actors, - num_intervals AS num_intervals - FROM - (SELECT aggregation_target AS aggregation_target, - count() AS num_intervals - FROM - (SELECT e.`$group_0` AS aggregation_target, - toStartOfWeek(toTimeZone(e.timestamp, 'UTC'), 0) AS start_of_interval - FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 99999), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-02-15 23:59:59', 6, 'UTC'))), equals(e.event, 'watched movie'), ifNull(notEquals(nullIf(nullIf(e.`$group_0`, ''), 'null'), ''), 1), notEquals(e.`$group_0`, '')) - GROUP BY aggregation_target, - start_of_interval - HAVING ifNull(greater(count(), 0), 0)) - GROUP BY aggregation_target) - GROUP BY num_intervals - ORDER BY num_intervals ASC) - GROUP BY num_intervals - ORDER BY num_intervals ASC) - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0 - ''' -# --- -# name: TestClickhouseStickiness.test_aggregate_by_groups.1 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT DISTINCT aggregation_target AS actor_id - FROM - (SELECT e."$group_0" AS aggregation_target, - countDistinct(toStartOfWeek(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC'), 0)) as num_intervals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event = 'watched movie' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfWeek(toDateTime('2020-01-01 00:00:00', 'UTC'), 0), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-02-15 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfWeek(toDateTime('2020-01-01 00:00:00', 'UTC'), 0), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-02-15 23:59:59', 'UTC') - AND event = 'watched movie' - AND (NOT has([''], "$group_0") - AND NOT has([''], "$group_0")) - GROUP BY aggregation_target) - WHERE num_intervals = 1 - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseStickiness.test_aggregate_by_groups.2 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT DISTINCT aggregation_target AS actor_id - FROM - (SELECT e."$group_0" AS aggregation_target, - countDistinct(toStartOfWeek(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC'), 0)) as num_intervals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event = 'watched movie' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfWeek(toDateTime('2020-01-01 00:00:00', 'UTC'), 0), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-02-15 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfWeek(toDateTime('2020-01-01 00:00:00', 'UTC'), 0), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-02-15 23:59:59', 'UTC') - AND event = 'watched movie' - AND (NOT has([''], "$group_0") - AND NOT has([''], "$group_0")) - GROUP BY aggregation_target) - WHERE num_intervals = 2 - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseStickiness.test_aggregate_by_groups.3 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT DISTINCT aggregation_target AS actor_id - FROM - (SELECT e."$group_0" AS aggregation_target, - countDistinct(toStartOfWeek(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC'), 0)) as num_intervals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event = 'watched movie' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfWeek(toDateTime('2020-01-01 00:00:00', 'UTC'), 0), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-02-15 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfWeek(toDateTime('2020-01-01 00:00:00', 'UTC'), 0), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-02-15 23:59:59', 'UTC') - AND event = 'watched movie' - AND (NOT has([''], "$group_0") - AND NOT has([''], "$group_0")) - GROUP BY aggregation_target) - WHERE num_intervals = 3 - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseStickiness.test_compare - ''' - /* user_id:0 request:_snapshot_ */ - SELECT groupArray(num_actors) AS counts, - groupArray(num_intervals) AS intervals - FROM - (SELECT sum(num_actors) AS num_actors, - num_intervals AS num_intervals - FROM - (SELECT 0 AS num_actors, - plus(numbers.number, 1) AS num_intervals - FROM numbers(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 23:59:59', 6, 'UTC'))), toIntervalDay(1)))) AS numbers - UNION ALL SELECT count(DISTINCT aggregation_target) AS num_actors, - num_intervals AS num_intervals - FROM - (SELECT aggregation_target AS aggregation_target, - count() AS num_intervals - FROM - (SELECT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS start_of_interval - FROM events AS e SAMPLE 1 - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) - WHERE and(equals(e.team_id, 99999), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 23:59:59', 6, 'UTC'))), equals(e.event, 'watched movie')) - GROUP BY aggregation_target, - start_of_interval - HAVING ifNull(greater(count(), 0), 0)) - GROUP BY aggregation_target) - GROUP BY num_intervals - ORDER BY num_intervals ASC) - GROUP BY num_intervals - ORDER BY num_intervals ASC) - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0 - ''' -# --- -# name: TestClickhouseStickiness.test_compare.1 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT groupArray(num_actors) AS counts, - groupArray(num_intervals) AS intervals - FROM - (SELECT sum(num_actors) AS num_actors, - num_intervals AS num_intervals - FROM - (SELECT 0 AS num_actors, - plus(numbers.number, 1) AS num_intervals - FROM numbers(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC'))), plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), toIntervalDay(1)))) AS numbers - UNION ALL SELECT count(DISTINCT aggregation_target) AS num_actors, - num_intervals AS num_intervals - FROM - (SELECT aggregation_target AS aggregation_target, - count() AS num_intervals - FROM - (SELECT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS start_of_interval - FROM events AS e SAMPLE 1 - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) - WHERE and(equals(e.team_id, 99999), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), equals(e.event, 'watched movie')) - GROUP BY aggregation_target, - start_of_interval - HAVING ifNull(greater(count(), 0), 0)) - GROUP BY aggregation_target) - GROUP BY num_intervals - ORDER BY num_intervals ASC) - GROUP BY num_intervals - ORDER BY num_intervals ASC) - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0 - ''' -# --- -# name: TestClickhouseStickiness.test_filter_by_group_properties - ''' - /* user_id:0 request:_snapshot_ */ - SELECT groupArray(num_actors) AS counts, - groupArray(num_intervals) AS intervals - FROM - (SELECT sum(num_actors) AS num_actors, - num_intervals AS num_intervals - FROM - (SELECT 0 AS num_actors, - plus(numbers.number, 1) AS num_intervals - FROM numbers(dateDiff('week', toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), 0), plus(toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-02-15 23:59:59', 6, 'UTC')), 0), toIntervalWeek(1)))) AS numbers - UNION ALL SELECT count(DISTINCT aggregation_target) AS num_actors, - num_intervals AS num_intervals - FROM - (SELECT aggregation_target AS aggregation_target, - count() AS num_intervals - FROM - (SELECT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target, - toStartOfWeek(toTimeZone(e.timestamp, 'UTC'), 0) AS start_of_interval - FROM events AS e SAMPLE 1 - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) - LEFT JOIN - (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___industry, - groups.group_type_index AS index, - groups.group_key AS key - FROM groups - WHERE and(equals(groups.team_id, 99999), equals(index, 0)) - GROUP BY groups.group_type_index, - groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) - WHERE and(equals(e.team_id, 99999), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-02-15 23:59:59', 6, 'UTC'))), equals(e.event, 'watched movie'), ifNull(equals(e__group_0.properties___industry, 'technology'), 0)) - GROUP BY aggregation_target, - start_of_interval - HAVING ifNull(greater(count(), 0), 0)) - GROUP BY aggregation_target) - GROUP BY num_intervals - ORDER BY num_intervals ASC) - GROUP BY num_intervals - ORDER BY num_intervals ASC) - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0 - ''' -# --- -# name: TestClickhouseStickiness.test_filter_by_group_properties.1 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT DISTINCT aggregation_target AS actor_id - FROM - (SELECT if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS aggregation_target, - countDistinct(toStartOfWeek(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC'), 0)) as num_intervals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event = 'watched movie' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfWeek(toDateTime('2020-01-01 00:00:00', 'UTC'), 0), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-02-15 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfWeek(toDateTime('2020-01-01 00:00:00', 'UTC'), 0), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-02-15 23:59:59', 'UTC') - AND event = 'watched movie' - AND (has(['technology'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) - GROUP BY aggregation_target) - WHERE num_intervals = 1 - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseStickiness.test_filter_by_group_properties.2 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT DISTINCT aggregation_target AS actor_id - FROM - (SELECT if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS aggregation_target, - countDistinct(toStartOfWeek(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC'), 0)) as num_intervals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event = 'watched movie' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfWeek(toDateTime('2020-01-01 00:00:00', 'UTC'), 0), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-02-15 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfWeek(toDateTime('2020-01-01 00:00:00', 'UTC'), 0), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-02-15 23:59:59', 'UTC') - AND event = 'watched movie' - AND (has(['technology'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) - GROUP BY aggregation_target) - WHERE num_intervals = 2 - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseStickiness.test_filter_by_group_properties.3 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT DISTINCT aggregation_target AS actor_id - FROM - (SELECT if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS aggregation_target, - countDistinct(toStartOfWeek(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC'), 0)) as num_intervals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event = 'watched movie' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfWeek(toDateTime('2020-01-01 00:00:00', 'UTC'), 0), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-02-15 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfWeek(toDateTime('2020-01-01 00:00:00', 'UTC'), 0), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-02-15 23:59:59', 'UTC') - AND event = 'watched movie' - AND (has(['technology'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) - GROUP BY aggregation_target) - WHERE num_intervals = 3 - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseStickiness.test_stickiness_all_time - ''' - /* user_id:0 request:_snapshot_ */ - SELECT timestamp - from events - WHERE team_id = 99999 - AND timestamp > '2015-01-01' - order by timestamp - limit 1 - ''' -# --- -# name: TestClickhouseStickiness.test_stickiness_all_time.1 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT groupArray(num_actors) AS counts, - groupArray(num_intervals) AS intervals - FROM - (SELECT sum(num_actors) AS num_actors, - num_intervals AS num_intervals - FROM - (SELECT 0 AS num_actors, - plus(numbers.number, 1) AS num_intervals - FROM numbers(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 12:00:00', 6, 'UTC'))), plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 23:59:59', 6, 'UTC'))), toIntervalDay(1)))) AS numbers - UNION ALL SELECT count(DISTINCT aggregation_target) AS num_actors, - num_intervals AS num_intervals - FROM - (SELECT aggregation_target AS aggregation_target, - count() AS num_intervals - FROM - (SELECT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS start_of_interval - FROM events AS e SAMPLE 1 - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) - WHERE and(equals(e.team_id, 99999), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 12:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 23:59:59', 6, 'UTC'))), equals(e.event, 'watched movie')) - GROUP BY aggregation_target, - start_of_interval - HAVING ifNull(greater(count(), 0), 0)) - GROUP BY aggregation_target) - GROUP BY num_intervals - ORDER BY num_intervals ASC) - GROUP BY num_intervals - ORDER BY num_intervals ASC) - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0 - ''' -# --- -# name: TestClickhouseStickiness.test_stickiness_all_time.2 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT timestamp - from events - WHERE team_id = 99999 - AND timestamp > '2015-01-01' - order by timestamp - limit 1 - ''' -# --- -# name: TestClickhouseStickiness.test_stickiness_all_time_with_sampling - ''' - /* user_id:0 request:_snapshot_ */ - SELECT timestamp - from events - WHERE team_id = 99999 - AND timestamp > '2015-01-01' - order by timestamp - limit 1 - ''' -# --- -# name: TestClickhouseStickiness.test_stickiness_all_time_with_sampling.1 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT groupArray(num_actors) AS counts, - groupArray(num_intervals) AS intervals - FROM - (SELECT sum(num_actors) AS num_actors, - num_intervals AS num_intervals - FROM - (SELECT 0 AS num_actors, - plus(numbers.number, 1) AS num_intervals - FROM numbers(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 12:00:00', 6, 'UTC'))), plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 23:59:59', 6, 'UTC'))), toIntervalDay(1)))) AS numbers - UNION ALL SELECT count(DISTINCT aggregation_target) AS num_actors, - num_intervals AS num_intervals - FROM - (SELECT aggregation_target AS aggregation_target, - count() AS num_intervals - FROM - (SELECT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS start_of_interval - FROM events AS e SAMPLE 1.0 - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) - WHERE and(equals(e.team_id, 99999), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 12:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 23:59:59', 6, 'UTC'))), equals(e.event, 'watched movie')) - GROUP BY aggregation_target, - start_of_interval - HAVING ifNull(greater(count(), 0), 0)) - GROUP BY aggregation_target) - GROUP BY num_intervals - ORDER BY num_intervals ASC) - GROUP BY num_intervals - ORDER BY num_intervals ASC) - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0 - ''' -# --- -# name: TestClickhouseStickiness.test_stickiness_all_time_with_sampling.2 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT timestamp - from events - WHERE team_id = 99999 - AND timestamp > '2015-01-01' - order by timestamp - limit 1 - ''' -# --- -# name: TestClickhouseStickiness.test_stickiness_hours - ''' - /* user_id:0 request:_snapshot_ */ - SELECT groupArray(num_actors) AS counts, - groupArray(num_intervals) AS intervals - FROM - (SELECT sum(num_actors) AS num_actors, - num_intervals AS num_intervals - FROM - (SELECT 0 AS num_actors, - plus(numbers.number, 1) AS num_intervals - FROM numbers(dateDiff('hour', toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 12:00:00', 6, 'UTC'))), plus(toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 20:00:00', 6, 'UTC'))), toIntervalHour(1)))) AS numbers - UNION ALL SELECT count(DISTINCT aggregation_target) AS num_actors, - num_intervals AS num_intervals - FROM - (SELECT aggregation_target AS aggregation_target, - count() AS num_intervals - FROM - (SELECT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target, - toStartOfHour(toTimeZone(e.timestamp, 'UTC')) AS start_of_interval - FROM events AS e SAMPLE 1 - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) - WHERE and(equals(e.team_id, 99999), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfHour(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 12:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 20:00:00', 6, 'UTC'))), equals(e.event, 'watched movie')) - GROUP BY aggregation_target, - start_of_interval - HAVING ifNull(greater(count(), 0), 0)) - GROUP BY aggregation_target) - GROUP BY num_intervals - ORDER BY num_intervals ASC) - GROUP BY num_intervals - ORDER BY num_intervals ASC) - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0 - ''' -# --- -# name: TestClickhouseStickiness.test_stickiness_people_endpoint - ''' - /* user_id:0 request:_snapshot_ */ - SELECT DISTINCT aggregation_target AS actor_id - FROM - (SELECT if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS aggregation_target, - countDistinct(toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC'))) as num_intervals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND ((event = 'watched movie')) - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND ((event = 'watched movie')) - GROUP BY aggregation_target) - WHERE num_intervals = 1 - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseStickiness.test_stickiness_people_paginated - ''' - /* user_id:0 request:_snapshot_ */ - SELECT DISTINCT aggregation_target AS actor_id - FROM - (SELECT if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS aggregation_target, - countDistinct(toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC'))) as num_intervals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND ((event = 'watched movie')) - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND ((event = 'watched movie')) - GROUP BY aggregation_target) - WHERE num_intervals = 1 - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: TestClickhouseStickiness.test_stickiness_people_paginated.1 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT DISTINCT aggregation_target AS actor_id - FROM - (SELECT if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS aggregation_target, - countDistinct(toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC'))) as num_intervals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND ((event = 'watched movie')) - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2020-01-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND ((event = 'watched movie')) - GROUP BY aggregation_target) - WHERE num_intervals = 1 - LIMIT 100 - OFFSET 100 - ''' -# --- -# name: TestClickhouseStickiness.test_stickiness_with_person_on_events_v2 - ''' - - SELECT DISTINCT person_id - FROM events - WHERE team_id = 99999 - AND distinct_id = 'person2' - ''' -# --- -# name: TestClickhouseStickiness.test_stickiness_with_person_on_events_v2.1 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT groupArray(num_actors) AS counts, - groupArray(num_intervals) AS intervals - FROM - (SELECT sum(num_actors) AS num_actors, - num_intervals AS num_intervals - FROM - (SELECT 0 AS num_actors, - plus(numbers.number, 1) AS num_intervals - FROM numbers(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 23:59:59', 6, 'UTC'))), toIntervalDay(1)))) AS numbers - UNION ALL SELECT count(DISTINCT aggregation_target) AS num_actors, - num_intervals AS num_intervals - FROM - (SELECT aggregation_target AS aggregation_target, - count() AS num_intervals - FROM - (SELECT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS start_of_interval - FROM events AS e SAMPLE 1 - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) - WHERE and(equals(e.team_id, 99999), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 23:59:59', 6, 'UTC'))), equals(e.event, 'watched movie')) - GROUP BY aggregation_target, - start_of_interval - HAVING ifNull(greater(count(), 0), 0)) - GROUP BY aggregation_target) - GROUP BY num_intervals - ORDER BY num_intervals ASC) - GROUP BY num_intervals - ORDER BY num_intervals ASC) - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0 - ''' -# --- -# name: TestClickhouseStickiness.test_timezones - ''' - - SELECT countDistinct(aggregation_target), - num_intervals - FROM - (SELECT if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS aggregation_target, - countDistinct(toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC'))) as num_intervals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2021-05-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-15 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2021-05-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-15 23:59:59', 'UTC') - AND event = '$pageview' - GROUP BY aggregation_target) - WHERE num_intervals <= 16 - GROUP BY num_intervals - ORDER BY num_intervals - ''' -# --- -# name: TestClickhouseStickiness.test_timezones.1 - ''' - - SELECT countDistinct(aggregation_target), - num_intervals - FROM - (SELECT if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) AS aggregation_target, - countDistinct(toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'US/Pacific'))) as num_intervals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'US/Pacific') >= toDateTime(toStartOfDay(toDateTime('2021-05-01 00:00:00', 'US/Pacific')), 'US/Pacific') - AND toTimeZone(timestamp, 'US/Pacific') <= toDateTime('2021-05-15 23:59:59', 'US/Pacific') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND toTimeZone(timestamp, 'US/Pacific') >= toDateTime(toStartOfDay(toDateTime('2021-05-01 00:00:00', 'US/Pacific')), 'US/Pacific') - AND toTimeZone(timestamp, 'US/Pacific') <= toDateTime('2021-05-15 23:59:59', 'US/Pacific') - AND event = '$pageview' - GROUP BY aggregation_target) - WHERE num_intervals <= 16 - GROUP BY num_intervals - ORDER BY num_intervals - ''' -# --- diff --git a/ee/clickhouse/views/test/__snapshots__/test_clickhouse_trends.ambr b/ee/clickhouse/views/test/__snapshots__/test_clickhouse_trends.ambr deleted file mode 100644 index 4d1cee60be..0000000000 --- a/ee/clickhouse/views/test/__snapshots__/test_clickhouse_trends.ambr +++ /dev/null @@ -1,1065 +0,0 @@ -# serializer version: 1 -# name: ClickhouseTestTrends.test_insight_trends_aggregate - ''' - /* user_id:0 request:_snapshot_ */ - SELECT count() AS total - FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 99999), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-15 23:59:59', 6, 'UTC'))), equals(e.event, '$pageview')) - ORDER BY 1 DESC - LIMIT 50000 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0 - ''' -# --- -# name: ClickhouseTestTrends.test_insight_trends_aggregate.1 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT person_id AS actor_id, - count() AS actor_value - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - e.distinct_id as distinct_id, - e.team_id as team_id - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2012-01-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-15 23:59:59', 'UTC')) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2012-01-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-15 23:59:59', 'UTC') ) - GROUP BY actor_id - ORDER BY actor_value DESC, - actor_id DESC - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: ClickhouseTestTrends.test_insight_trends_basic - ''' - /* user_id:0 request:_snapshot_ */ - SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-15 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total - FROM - (SELECT sum(total) AS count, - day_start AS day_start - FROM - (SELECT count(DISTINCT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id)) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start - FROM events AS e SAMPLE 1 - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) - WHERE and(equals(e.team_id, 99999), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-15 23:59:59', 6, 'UTC'))), equals(e.event, '$pageview')) - GROUP BY day_start) - GROUP BY day_start - ORDER BY day_start ASC) - ORDER BY arraySum(total) DESC - LIMIT 50000 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0 - ''' -# --- -# name: ClickhouseTestTrends.test_insight_trends_basic.1 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT person_id AS actor_id, - count() AS actor_value - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - e.distinct_id as distinct_id, - e.team_id as team_id - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2012-01-14 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-14 23:59:59', 'UTC')) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2012-01-14 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-14 23:59:59', 'UTC') ) - GROUP BY actor_id - ORDER BY actor_value DESC, - actor_id DESC - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: ClickhouseTestTrends.test_insight_trends_clean_arg - ''' - /* user_id:0 request:_snapshot_ */ - SELECT groupArray(day_start) as date, - groupArray(count) AS total - FROM - (SELECT SUM(total) AS count, - day_start - FROM - (SELECT toUInt16(0) AS total, - toStartOfDay(toDateTime('2012-01-15 23:59:59', 'UTC')) - toIntervalDay(number) AS day_start - FROM numbers(dateDiff('day', toStartOfDay(toDateTime('2012-01-01 00:00:00', 'UTC')), toDateTime('2012-01-15 23:59:59', 'UTC'))) - UNION ALL SELECT toUInt16(0) AS total, - toStartOfDay(toDateTime('2012-01-01 00:00:00', 'UTC')) - UNION ALL SELECT count(*) AS total, - toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) AS date - FROM events e - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2012-01-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-15 23:59:59', 'UTC') - AND (has(['val'], replaceRegexpAll(JSONExtractRaw(e.properties, 'key'), '^"|"$', ''))) - GROUP BY date) - GROUP BY day_start - ORDER BY day_start) - ''' -# --- -# name: ClickhouseTestTrends.test_insight_trends_clean_arg.1 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT person_id AS actor_id, - count() AS actor_value - FROM - (SELECT e.timestamp as timestamp, - e."properties" as "properties", - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - e.distinct_id as distinct_id, - e.team_id as team_id - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2012-01-14 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-14 23:59:59', 'UTC')) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2012-01-14 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-14 23:59:59', 'UTC') - AND (has(['val'], replaceRegexpAll(JSONExtractRaw(e.properties, 'key'), '^"|"$', ''))) ) - GROUP BY actor_id - ORDER BY actor_value DESC, - actor_id DESC - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: ClickhouseTestTrends.test_insight_trends_cumulative - ''' - /* user_id:0 request:_snapshot_ */ - SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-15 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayFill(x -> ifNull(greater(x, 0), 0), arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date)) AS total - FROM - (SELECT day_start AS day_start, - sum(count) OVER ( - ORDER BY day_start ASC) AS count - FROM - (SELECT sum(total) AS count, - day_start AS day_start - FROM - (SELECT count() AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start - FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 99999), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-15 23:59:59', 6, 'UTC'))), equals(e.event, '$pageview')) - GROUP BY day_start) - GROUP BY day_start - ORDER BY day_start ASC) - ORDER BY day_start ASC) - ORDER BY arraySum(total) DESC - LIMIT 50000 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0 - ''' -# --- -# name: ClickhouseTestTrends.test_insight_trends_cumulative.1 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-15 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayFill(x -> ifNull(greater(x, 0), 0), arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date)) AS total - FROM - (SELECT day_start AS day_start, - sum(count) OVER ( - ORDER BY day_start ASC) AS count - FROM - (SELECT sum(total) AS count, - day_start AS day_start - FROM - (SELECT count(DISTINCT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id)) AS total, - min(toStartOfDay(toTimeZone(e.timestamp, 'UTC'))) AS day_start - FROM events AS e SAMPLE 1 - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) - WHERE and(equals(e.team_id, 99999), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-15 23:59:59', 6, 'UTC'))), equals(e.event, '$pageview')) - GROUP BY if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id)) - GROUP BY day_start - ORDER BY day_start ASC) - ORDER BY day_start ASC) - ORDER BY arraySum(total) DESC - LIMIT 50000 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0 - ''' -# --- -# name: ClickhouseTestTrends.test_insight_trends_cumulative.10 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT replaceRegexpAll(JSONExtractRaw(properties, 'key'), '^"|"$', '') AS value, - count(*) as count - FROM events e - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2012-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-15 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: ClickhouseTestTrends.test_insight_trends_cumulative.11 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT groupArray(day_start) as date, - groupArray(count) AS total, - breakdown_value - FROM - (SELECT SUM(total) as count, - day_start, - breakdown_value - FROM - (SELECT * - FROM - (SELECT toUInt16(0) AS total, - ticks.day_start as day_start, - breakdown_value - FROM - (SELECT toStartOfDay(toDateTime('2012-01-15 23:59:59', 'UTC')) - toIntervalDay(number) as day_start - FROM numbers(15) - UNION ALL SELECT toStartOfDay(toDateTime('2012-01-01 00:00:00', 'UTC')) as day_start) as ticks - CROSS JOIN - (SELECT breakdown_value - FROM - (SELECT ['val', 'notval'] as breakdown_value) ARRAY - JOIN breakdown_value) as sec - ORDER BY breakdown_value, - day_start - UNION ALL SELECT count(DISTINCT person_id) as total, - toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) as day_start, - breakdown_value - FROM - (SELECT person_id, - min(timestamp) as timestamp, - breakdown_value - FROM - (SELECT pdi.person_id as person_id, timestamp, transform(ifNull(nullIf(replaceRegexpAll(JSONExtractRaw(properties, 'key'), '^"|"$', ''), ''), '$$_posthog_breakdown_null_$$'), (['val', 'notval']), (['val', 'notval']), '$$_posthog_breakdown_other_$$') as breakdown_value - FROM events e - INNER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) as pdi ON events.distinct_id = pdi.distinct_id - WHERE e.team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2012-01-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-15 23:59:59', 'UTC') ) - GROUP BY person_id, - breakdown_value) AS pdi - GROUP BY day_start, - breakdown_value)) - GROUP BY day_start, - breakdown_value - ORDER BY breakdown_value, - day_start) - GROUP BY breakdown_value - ORDER BY breakdown_value - ''' -# --- -# name: ClickhouseTestTrends.test_insight_trends_cumulative.12 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT person_id AS actor_id, - count() AS actor_value - FROM - (SELECT e.timestamp as timestamp, - e."properties" as "properties", - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - e.distinct_id as distinct_id, - e.team_id as team_id - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2012-01-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-14 23:59:59', 'UTC')) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2012-01-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-14 23:59:59', 'UTC') - AND (has(['val'], replaceRegexpAll(JSONExtractRaw(e.properties, 'key'), '^"|"$', ''))) ) - GROUP BY actor_id - ORDER BY actor_value DESC, - actor_id DESC - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: ClickhouseTestTrends.test_insight_trends_cumulative.2 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT groupArray(1)(date)[1] AS date, - arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(ifNull(total, 0)), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, - if(ifNull(ifNull(greaterOrEquals(row_number, 25), 0), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value - FROM - (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-15 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayFill(x -> ifNull(greater(x, 0), 0), arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date)) AS total, - breakdown_value AS breakdown_value, - rowNumberInAllBlocks() AS row_number - FROM - (SELECT day_start AS day_start, - sum(count) OVER (PARTITION BY breakdown_value - ORDER BY day_start ASC) AS count, - breakdown_value AS breakdown_value - FROM - (SELECT sum(total) AS count, - day_start AS day_start, - breakdown_value AS breakdown_value - FROM - (SELECT count() AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start, - ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value - FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 99999), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-15 23:59:59', 6, 'UTC'))), equals(e.event, '$pageview')) - GROUP BY day_start, - breakdown_value) - GROUP BY day_start, - breakdown_value - ORDER BY day_start ASC, breakdown_value ASC) - ORDER BY day_start ASC) - GROUP BY breakdown_value - ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC) - WHERE isNotNull(breakdown_value) - GROUP BY breakdown_value - ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC - LIMIT 50000 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0 - ''' -# --- -# name: ClickhouseTestTrends.test_insight_trends_cumulative.3 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT groupArray(1)(date)[1] AS date, - arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(ifNull(total, 0)), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, - if(ifNull(ifNull(greaterOrEquals(row_number, 25), 0), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value - FROM - (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-15 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayFill(x -> ifNull(greater(x, 0), 0), arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date)) AS total, - breakdown_value AS breakdown_value, - rowNumberInAllBlocks() AS row_number - FROM - (SELECT day_start AS day_start, - sum(count) OVER (PARTITION BY breakdown_value - ORDER BY day_start ASC) AS count, - breakdown_value AS breakdown_value - FROM - (SELECT sum(total) AS count, - day_start AS day_start, - breakdown_value AS breakdown_value - FROM - (SELECT counts AS total, - toStartOfDay(timestamp) AS day_start, - breakdown_value AS breakdown_value - FROM - (SELECT d.timestamp AS timestamp, - count(DISTINCT e.actor_id) AS counts, - e.breakdown_value AS breakdown_value - FROM - (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-15 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS timestamp - FROM numbers(dateDiff('day', minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(7)), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-15 23:59:59', 6, 'UTC')))) AS numbers) AS d - CROSS JOIN - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS actor_id, - ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value - FROM events AS e SAMPLE 1 - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) - LEFT JOIN - (SELECT person.id AS id, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'key'), ''), 'null'), '^"|"$', '') AS properties___key - FROM person - WHERE and(equals(person.team_id, 99999), ifNull(in(tuple(person.id, person.version), - (SELECT person.id AS id, max(person.version) AS version - FROM person - WHERE equals(person.team_id, 99999) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__person ON equals(if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id), e__person.id) - WHERE and(equals(e.team_id, 99999), and(equals(e.event, '$pageview'), ifNull(equals(e__person.properties___key, 'some_val'), 0), ifNull(equals(e__person.properties___key, 'some_val'), 0)), greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')), toIntervalDay(7))), lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-15 23:59:59', 6, 'UTC')))) - GROUP BY timestamp, actor_id, - breakdown_value) AS e - WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) - GROUP BY d.timestamp, - breakdown_value - ORDER BY d.timestamp ASC) - WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-15 23:59:59', 6, 'UTC'))), 0))) - GROUP BY day_start, - breakdown_value - ORDER BY day_start ASC, breakdown_value ASC) - ORDER BY day_start ASC) - GROUP BY breakdown_value - ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC) - WHERE isNotNull(breakdown_value) - GROUP BY breakdown_value - ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC - LIMIT 50000 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0 - ''' -# --- -# name: ClickhouseTestTrends.test_insight_trends_cumulative.4 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT groupArray(1)(date)[1] AS date, - arrayFold((acc, x) -> arrayMap(i -> plus(acc[i], x[i]), range(1, plus(length(date), 1))), groupArray(ifNull(total, 0)), arrayWithConstant(length(date), reinterpretAsFloat64(0))) AS total, - if(ifNull(ifNull(greaterOrEquals(row_number, 25), 0), 0), '$$_posthog_breakdown_other_$$', breakdown_value) AS breakdown_value - FROM - (SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-15 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayFill(x -> ifNull(greater(x, 0), 0), arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date)) AS total, - breakdown_value AS breakdown_value, - rowNumberInAllBlocks() AS row_number - FROM - (SELECT day_start AS day_start, - sum(count) OVER (PARTITION BY breakdown_value - ORDER BY day_start ASC) AS count, - breakdown_value AS breakdown_value - FROM - (SELECT sum(total) AS count, - day_start AS day_start, - breakdown_value AS breakdown_value - FROM - (SELECT count(DISTINCT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id)) AS total, - min(toStartOfDay(toTimeZone(e.timestamp, 'UTC'))) AS day_start, - ifNull(nullIf(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '')), ''), '$$_posthog_breakdown_null_$$') AS breakdown_value - FROM events AS e SAMPLE 1 - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) - WHERE and(equals(e.team_id, 99999), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-15 23:59:59', 6, 'UTC'))), equals(e.event, '$pageview')) - GROUP BY if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id), - breakdown_value) - GROUP BY day_start, - breakdown_value - ORDER BY day_start ASC, breakdown_value ASC) - ORDER BY day_start ASC) - GROUP BY breakdown_value - ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC) - WHERE isNotNull(breakdown_value) - GROUP BY breakdown_value - ORDER BY if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_other_$$'), 0), 2, if(ifNull(equals(breakdown_value, '$$_posthog_breakdown_null_$$'), 0), 1, 0)) ASC, arraySum(total) DESC, breakdown_value ASC - LIMIT 50000 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0 - ''' -# --- -# name: ClickhouseTestTrends.test_insight_trends_cumulative.5 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT groupArray(day_start) as date, - groupArray(count) AS total, - breakdown_value - FROM - (SELECT SUM(total) as count, - day_start, - breakdown_value - FROM - (SELECT * - FROM - (SELECT toUInt16(0) AS total, - ticks.day_start as day_start, - breakdown_value - FROM - (SELECT toStartOfDay(toDateTime('2012-01-15 23:59:59', 'UTC')) - toIntervalDay(number) as day_start - FROM numbers(15) - UNION ALL SELECT toStartOfDay(toDateTime('2012-01-01 00:00:00', 'UTC')) as day_start) as ticks - CROSS JOIN - (SELECT breakdown_value - FROM - (SELECT ['val', 'notval'] as breakdown_value) ARRAY - JOIN breakdown_value) as sec - ORDER BY breakdown_value, - day_start - UNION ALL SELECT count(*) as total, - toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) as day_start, - transform(ifNull(nullIf(replaceRegexpAll(JSONExtractRaw(properties, 'key'), '^"|"$', ''), ''), '$$_posthog_breakdown_null_$$'), (['val', 'notval']), (['val', 'notval']), '$$_posthog_breakdown_other_$$') as breakdown_value - FROM events e - WHERE e.team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2012-01-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-15 23:59:59', 'UTC') - GROUP BY day_start, - breakdown_value)) - GROUP BY day_start, - breakdown_value - ORDER BY breakdown_value, - day_start) - GROUP BY breakdown_value - ORDER BY breakdown_value - ''' -# --- -# name: ClickhouseTestTrends.test_insight_trends_cumulative.6 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT person_id AS actor_id, - count() AS actor_value - FROM - (SELECT e.timestamp as timestamp, - e."properties" as "properties", - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - e.distinct_id as distinct_id, - e.team_id as team_id - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2012-01-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-14 23:59:59', 'UTC')) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2012-01-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-14 23:59:59', 'UTC') - AND (has(['val'], replaceRegexpAll(JSONExtractRaw(e.properties, 'key'), '^"|"$', ''))) ) - GROUP BY actor_id - ORDER BY actor_value DESC, - actor_id DESC - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: ClickhouseTestTrends.test_insight_trends_cumulative.7 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT replaceRegexpAll(JSONExtractRaw(properties, 'key'), '^"|"$', '') AS value, - count(DISTINCT pdi.person_id) as count - FROM events e - INNER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id - FROM person - WHERE team_id = 99999 - AND id IN - (SELECT id - FROM person - WHERE team_id = 99999 - AND ((has(['some_val'], replaceRegexpAll(JSONExtractRaw(properties, 'key'), '^"|"$', ''))) - AND (has(['some_val'], replaceRegexpAll(JSONExtractRaw(properties, 'key'), '^"|"$', '')))) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND ((has(['some_val'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'key'), '^"|"$', ''))) - AND (has(['some_val'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'key'), '^"|"$', '')))) SETTINGS optimize_aggregation_in_order = 1) person ON pdi.person_id = person.id - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2012-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-15 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: ClickhouseTestTrends.test_insight_trends_cumulative.8 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT groupArray(day_start) as date, - groupArray(count) AS total, - breakdown_value - FROM - (SELECT SUM(total) as count, - day_start, - breakdown_value - FROM - (SELECT * - FROM - (SELECT toUInt16(0) AS total, - ticks.day_start as day_start, - breakdown_value - FROM - (SELECT toStartOfDay(toDateTime('2012-01-15 23:59:59', 'UTC')) - toIntervalDay(number) as day_start - FROM numbers(15) - UNION ALL SELECT toStartOfDay(toDateTime('2012-01-01 00:00:00', 'UTC')) as day_start) as ticks - CROSS JOIN - (SELECT breakdown_value - FROM - (SELECT ['val', 'notval'] as breakdown_value) ARRAY - JOIN breakdown_value) as sec - ORDER BY breakdown_value, - day_start - UNION ALL SELECT counts AS total, - timestamp AS day_start, - breakdown_value - FROM - (SELECT d.timestamp, - COUNT(DISTINCT person_id) counts, - breakdown_value - FROM - (SELECT toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) AS timestamp - FROM events e - WHERE team_id = 99999 - AND toDateTime(timestamp, 'UTC') >= toDateTime('2011-12-25 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-15 23:59:59', 'UTC') - GROUP BY timestamp) d - CROSS JOIN - (SELECT toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) AS timestamp, - pdi.person_id AS person_id, - transform(ifNull(nullIf(replaceRegexpAll(JSONExtractRaw(properties, 'key'), '^"|"$', ''), ''), '$$_posthog_breakdown_null_$$'), (['val', 'notval']), (['val', 'notval']), '$$_posthog_breakdown_other_$$') AS breakdown_value - FROM events e - INNER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) as pdi ON events.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id - FROM person - WHERE team_id = 99999 - AND id IN - (SELECT id - FROM person - WHERE team_id = 99999 - AND ((has(['some_val'], replaceRegexpAll(JSONExtractRaw(properties, 'key'), '^"|"$', ''))) - AND (has(['some_val'], replaceRegexpAll(JSONExtractRaw(properties, 'key'), '^"|"$', '')))) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND ((has(['some_val'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'key'), '^"|"$', ''))) - AND (has(['some_val'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'key'), '^"|"$', '')))) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id - WHERE e.team_id = 99999 - AND event = '$pageview' - AND toDateTime(timestamp, 'UTC') >= toDateTime('2011-12-25 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-15 23:59:59', 'UTC') - GROUP BY timestamp, person_id, - breakdown_value) e - WHERE e.timestamp <= d.timestamp - AND e.timestamp > d.timestamp - INTERVAL 6 DAY - GROUP BY d.timestamp, - breakdown_value - ORDER BY d.timestamp) - WHERE 11111 = 11111 - AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2012-01-01 00:00:00', 'UTC')), 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-15 23:59:59', 'UTC') )) - GROUP BY day_start, - breakdown_value - ORDER BY breakdown_value, - day_start) - GROUP BY breakdown_value - ORDER BY breakdown_value - ''' -# --- -# name: ClickhouseTestTrends.test_insight_trends_cumulative.9 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT person_id AS actor_id, - count() AS actor_value - FROM - (SELECT e.timestamp as timestamp, - e."properties" as "properties", - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - e.distinct_id as distinct_id, - e.team_id as team_id - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event = '$pageview' - AND toDateTime(timestamp, 'UTC') >= toDateTime('2011-12-25 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-14 23:59:59', 'UTC')) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id - FROM person - WHERE team_id = 99999 - AND id IN - (SELECT id - FROM person - WHERE team_id = 99999 - AND (((has(['some_val'], replaceRegexpAll(JSONExtractRaw(properties, 'key'), '^"|"$', '')))) - AND (has(['some_val'], replaceRegexpAll(JSONExtractRaw(properties, 'key'), '^"|"$', '')))) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND (((has(['some_val'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'key'), '^"|"$', '')))) - AND (has(['some_val'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'key'), '^"|"$', '')))) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id - WHERE team_id = 99999 - AND event = '$pageview' - AND toDateTime(timestamp, 'UTC') >= toDateTime('2011-12-25 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-14 23:59:59', 'UTC') - AND (((has(['val'], replaceRegexpAll(JSONExtractRaw(e.properties, 'key'), '^"|"$', ''))))) ) - GROUP BY actor_id - ORDER BY actor_value DESC, - actor_id DESC - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: ClickhouseTestTrendsCaching.test_insight_trends_merging - ''' - /* user_id:0 request:_snapshot_ */ - SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-15 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total - FROM - (SELECT sum(total) AS count, - day_start AS day_start - FROM - (SELECT count(DISTINCT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id)) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start - FROM events AS e SAMPLE 1 - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) - WHERE and(equals(e.team_id, 99999), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-15 23:59:59', 6, 'UTC'))), equals(e.event, '$pageview')) - GROUP BY day_start) - GROUP BY day_start - ORDER BY day_start ASC) - ORDER BY arraySum(total) DESC - LIMIT 50000 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0 - ''' -# --- -# name: ClickhouseTestTrendsCaching.test_insight_trends_merging.1 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-15 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total - FROM - (SELECT sum(total) AS count, - day_start AS day_start - FROM - (SELECT count(DISTINCT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id)) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start - FROM events AS e SAMPLE 1 - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) - WHERE and(equals(e.team_id, 99999), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-15 23:59:59', 6, 'UTC'))), equals(e.event, '$pageview')) - GROUP BY day_start) - GROUP BY day_start - ORDER BY day_start ASC) - ORDER BY arraySum(total) DESC - LIMIT 50000 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0 - ''' -# --- -# name: ClickhouseTestTrendsCaching.test_insight_trends_merging_skipped_interval - ''' - /* user_id:0 request:_snapshot_ */ - SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2011-12-31 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2011-12-31 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-14 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total - FROM - (SELECT sum(total) AS count, - day_start AS day_start - FROM - (SELECT count(DISTINCT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id)) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start - FROM events AS e SAMPLE 1 - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) - WHERE and(equals(e.team_id, 99999), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2011-12-31 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-14 23:59:59', 6, 'UTC'))), equals(e.event, '$pageview')) - GROUP BY day_start) - GROUP BY day_start - ORDER BY day_start ASC) - ORDER BY arraySum(total) DESC - LIMIT 50000 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0 - ''' -# --- -# name: ClickhouseTestTrendsCaching.test_insight_trends_merging_skipped_interval.1 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-02 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-02 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-16 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total - FROM - (SELECT sum(total) AS count, - day_start AS day_start - FROM - (SELECT count(DISTINCT if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id)) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start - FROM events AS e SAMPLE 1 - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) - WHERE and(equals(e.team_id, 99999), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-02 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-16 23:59:59', 6, 'UTC'))), equals(e.event, '$pageview')) - GROUP BY day_start) - GROUP BY day_start - ORDER BY day_start ASC) - ORDER BY arraySum(total) DESC - LIMIT 50000 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0 - ''' -# --- -# name: ClickhouseTestTrendsGroups.test_aggregating_by_group - ''' - /* user_id:0 request:_snapshot_ */ - SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total - FROM - (SELECT sum(total) AS count, - day_start AS day_start - FROM - (SELECT count(DISTINCT e.`$group_0`) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start - FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 99999), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, '$pageview'), ifNull(notEquals(nullIf(nullIf(e.`$group_0`, ''), 'null'), ''), 1), notEquals(e.`$group_0`, '')) - GROUP BY day_start) - GROUP BY day_start - ORDER BY day_start ASC) - ORDER BY arraySum(total) DESC - LIMIT 50000 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0 - ''' -# --- -# name: ClickhouseTestTrendsGroups.test_aggregating_by_group.1 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT $group_0 AS actor_id, - count() AS actor_value - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as "$group_0" - FROM events e - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-02 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-02 23:59:59', 'UTC') - AND (NOT has([''], "$group_0") - AND NOT has([''], "$group_0")) - AND "$group_0" != '' ) - GROUP BY actor_id - ORDER BY actor_value DESC, - actor_id DESC - LIMIT 100 - OFFSET 0 - ''' -# --- -# name: ClickhouseTestTrendsGroups.test_aggregating_by_session - ''' - /* user_id:0 request:_snapshot_ */ - SELECT arrayMap(number -> plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toIntervalDay(number)), range(0, plus(coalesce(dateDiff('day', toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))))), 1))) AS date, - arrayMap(_match_date -> arraySum(arraySlice(groupArray(ifNull(count, 0)), indexOf(groupArray(day_start) AS _days_for_count, _match_date) AS _index, plus(minus(arrayLastIndex(x -> ifNull(equals(x, _match_date), isNull(x) - and isNull(_match_date)), _days_for_count), _index), 1))), date) AS total - FROM - (SELECT sum(total) AS count, - day_start AS day_start - FROM - (SELECT count(DISTINCT e.`$session_id`) AS total, - toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start - FROM events AS e SAMPLE 1 - WHERE and(equals(e.team_id, 99999), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, '$pageview')) - GROUP BY day_start) - GROUP BY day_start - ORDER BY day_start ASC) - ORDER BY arraySum(total) DESC - LIMIT 50000 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0 - ''' -# --- -# name: ClickhouseTestTrendsGroups.test_aggregating_by_session.1 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT person_id AS actor_id, - count() AS actor_value - FROM - (SELECT e.timestamp as timestamp, - e."$session_id" as "$session_id", - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - e.distinct_id as distinct_id, - e.team_id as team_id - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-02 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-02 23:59:59', 'UTC')) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event = '$pageview' - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-02 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-02 23:59:59', 'UTC') ) - GROUP BY actor_id - ORDER BY actor_value DESC, - actor_id DESC - LIMIT 100 - OFFSET 0 - ''' -# --- diff --git a/ee/clickhouse/views/test/funnel/__init__.py b/ee/clickhouse/views/test/funnel/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/clickhouse/views/test/funnel/__snapshots__/test_clickhouse_funnel.ambr b/ee/clickhouse/views/test/funnel/__snapshots__/test_clickhouse_funnel.ambr deleted file mode 100644 index 3923f79e01..0000000000 --- a/ee/clickhouse/views/test/funnel/__snapshots__/test_clickhouse_funnel.ambr +++ /dev/null @@ -1,503 +0,0 @@ -# serializer version: 1 -# name: ClickhouseTestFunnelGroups.test_funnel_aggregation_with_groups - ''' - /* user_id:0 request:_snapshot_ */ - SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, - countIf(ifNull(equals(steps, 2), 0)) AS step_2, - avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, - median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time - FROM - (SELECT aggregation_target AS aggregation_target, - steps AS steps, - avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, - median(step_1_conversion_time) AS step_1_median_conversion_time_inner - FROM - (SELECT aggregation_target AS aggregation_target, - steps AS steps, - max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, - step_1_conversion_time AS step_1_conversion_time - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - latest_1 AS latest_1, - if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1) AS steps, - if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - min(latest_1) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 - FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e.`$group_0` AS aggregation_target, - if(equals(e.event, 'user signed up'), 1, 0) AS step_0, - if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(equals(e.event, 'paid'), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 - FROM events AS e - WHERE and(equals(e.team_id, 99999), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(notEquals(nullIf(nullIf(e.`$group_0`, ''), 'null'), ''), 1)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) - WHERE ifNull(equals(step_0, 1), 0))) - GROUP BY aggregation_target, - steps - HAVING ifNull(equals(steps, max(max_steps)), isNull(steps) - and isNull(max(max_steps)))) - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=23622320128, - allow_experimental_analyzer=1 - ''' -# --- -# name: ClickhouseTestFunnelGroups.test_funnel_aggregation_with_groups.1 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT aggregation_target AS actor_id - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND ((NOT has([''], "$group_0")) - AND (NOT has([''], "$group_0"))) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target - LIMIT 100 - OFFSET 0 SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000 - ''' -# --- -# name: ClickhouseTestFunnelGroups.test_funnel_group_aggregation_with_groups_entity_filtering - ''' - /* user_id:0 request:_snapshot_ */ - SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, - countIf(ifNull(equals(steps, 2), 0)) AS step_2, - avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, - median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time - FROM - (SELECT aggregation_target AS aggregation_target, - steps AS steps, - avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, - median(step_1_conversion_time) AS step_1_median_conversion_time_inner - FROM - (SELECT aggregation_target AS aggregation_target, - steps AS steps, - max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, - step_1_conversion_time AS step_1_conversion_time - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - latest_1 AS latest_1, - if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1) AS steps, - if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - min(latest_1) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 - FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e.`$group_0` AS aggregation_target, - if(and(equals(e.event, 'user signed up'), ifNull(equals(nullIf(nullIf(e.`$group_0`, ''), 'null'), 'org:5'), 0)), 1, 0) AS step_0, - if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(equals(e.event, 'paid'), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 - FROM events AS e - WHERE and(equals(e.team_id, 99999), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(notEquals(nullIf(nullIf(e.`$group_0`, ''), 'null'), ''), 1)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) - WHERE ifNull(equals(step_0, 1), 0))) - GROUP BY aggregation_target, - steps - HAVING ifNull(equals(steps, max(max_steps)), isNull(steps) - and isNull(max(max_steps)))) - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=23622320128, - allow_experimental_analyzer=1 - ''' -# --- -# name: ClickhouseTestFunnelGroups.test_funnel_group_aggregation_with_groups_entity_filtering.1 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT aggregation_target AS actor_id - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(event = 'user signed up' - AND (has(['org:5'], "$group_0")), 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND ((NOT has([''], "$group_0")) - AND (NOT has([''], "$group_0"))) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target - LIMIT 100 - OFFSET 0 SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000 - ''' -# --- -# name: ClickhouseTestFunnelGroups.test_funnel_with_groups_entity_filtering - ''' - /* user_id:0 request:_snapshot_ */ - SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, - countIf(ifNull(equals(steps, 2), 0)) AS step_2, - avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, - median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time - FROM - (SELECT aggregation_target AS aggregation_target, - steps AS steps, - avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, - median(step_1_conversion_time) AS step_1_median_conversion_time_inner - FROM - (SELECT aggregation_target AS aggregation_target, - steps AS steps, - max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, - step_1_conversion_time AS step_1_conversion_time - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - latest_1 AS latest_1, - if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1) AS steps, - if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - min(latest_1) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 - FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target, - if(and(equals(e.event, 'user signed up'), ifNull(equals(nullIf(nullIf(e.`$group_0`, ''), 'null'), 'org:5'), 0)), 1, 0) AS step_0, - if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(equals(e.event, 'paid'), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 - FROM events AS e - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) - WHERE and(equals(e.team_id, 99999), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) - WHERE ifNull(equals(step_0, 1), 0))) - GROUP BY aggregation_target, - steps - HAVING ifNull(equals(steps, max(max_steps)), isNull(steps) - and isNull(max(max_steps)))) - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=23622320128, - allow_experimental_analyzer=1 - ''' -# --- -# name: ClickhouseTestFunnelGroups.test_funnel_with_groups_entity_filtering.1 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT aggregation_target AS actor_id - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'user signed up' - AND (has(['org:5'], "$group_0")), 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target - LIMIT 100 - OFFSET 0 SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000 - ''' -# --- -# name: ClickhouseTestFunnelGroups.test_funnel_with_groups_global_filtering - ''' - /* user_id:0 request:_snapshot_ */ - SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, - countIf(ifNull(equals(steps, 2), 0)) AS step_2, - avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, - median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time - FROM - (SELECT aggregation_target AS aggregation_target, - steps AS steps, - avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, - median(step_1_conversion_time) AS step_1_median_conversion_time_inner - FROM - (SELECT aggregation_target AS aggregation_target, - steps AS steps, - max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, - step_1_conversion_time AS step_1_conversion_time - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - latest_1 AS latest_1, - if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1) AS steps, - if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - min(latest_1) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 - FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target, - if(equals(e.event, 'user signed up'), 1, 0) AS step_0, - if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(equals(e.event, 'paid'), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 - FROM events AS e - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) - LEFT JOIN - (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___industry, - groups.group_type_index AS index, - groups.group_key AS key - FROM groups - WHERE and(equals(groups.team_id, 99999), equals(index, 0)) - GROUP BY groups.group_type_index, - groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) - WHERE and(equals(e.team_id, 99999), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'finance'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) - WHERE ifNull(equals(step_0, 1), 0))) - GROUP BY aggregation_target, - steps - HAVING ifNull(equals(steps, max(max_steps)), isNull(steps) - and isNull(max(max_steps)))) - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=23622320128, - allow_experimental_analyzer=1 - ''' -# --- -# name: ClickhouseTestFunnelGroups.test_funnel_with_groups_global_filtering.1 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT aggregation_target AS actor_id - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND (has(['finance'], replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', ''))) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target - LIMIT 100 - OFFSET 0 SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000 - ''' -# --- diff --git a/ee/clickhouse/views/test/funnel/__snapshots__/test_clickhouse_funnel_person.ambr b/ee/clickhouse/views/test/funnel/__snapshots__/test_clickhouse_funnel_person.ambr deleted file mode 100644 index 7272f10b35..0000000000 --- a/ee/clickhouse/views/test/funnel/__snapshots__/test_clickhouse_funnel_person.ambr +++ /dev/null @@ -1,117 +0,0 @@ -# serializer version: 1 -# name: TestFunnelPerson.test_funnel_actors_with_groups_search - ''' - /* user_id:0 request:_snapshot_ */ - SELECT aggregation_target AS actor_id - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - avg(step_2_conversion_time) step_2_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time, - step_2_conversion_time - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY - AND latest_1 <= latest_2 - AND latest_2 <= latest_0 + INTERVAL 14 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1)) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - if(isNotNull(latest_2) - AND latest_2 <= latest_1 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - if(latest_2 < latest_1, NULL, latest_2) as latest_2 - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - person.person_props as person_props, - if(event = 'step one', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'step two', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = 'step three', 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2 - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['step one', 'step three', 'step two'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-10 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id, - argMax(properties, version) as person_props - FROM person - WHERE team_id = 99999 - GROUP BY id - HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['step one', 'step three', 'step two'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-10 23:59:59', 'UTC') - AND ((replaceRegexpAll(JSONExtractRaw(person_props, 'email'), '^"|"$', '') ILIKE '%g0%' - OR replaceRegexpAll(JSONExtractRaw(person_props, 'name'), '^"|"$', '') ILIKE '%g0%' - OR replaceRegexpAll(JSONExtractRaw(e.properties, 'distinct_id'), '^"|"$', '') ILIKE '%g0%' - OR replaceRegexpAll(JSONExtractRaw(group_properties_0, 'name'), '^"|"$', '') ILIKE '%g0%' - OR replaceRegexpAll(JSONExtractRaw(group_properties_0, 'slug'), '^"|"$', '') ILIKE '%g0%') - AND (NOT has([''], "$group_0"))) - AND (step_0 = 1 - OR step_1 = 1 - OR step_2 = 1) )))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2, 3] - ORDER BY aggregation_target - LIMIT 100 - OFFSET 0 SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000 - ''' -# --- diff --git a/ee/clickhouse/views/test/funnel/__snapshots__/test_clickhouse_funnel_unordered.ambr b/ee/clickhouse/views/test/funnel/__snapshots__/test_clickhouse_funnel_unordered.ambr deleted file mode 100644 index fb6e9b5d83..0000000000 --- a/ee/clickhouse/views/test/funnel/__snapshots__/test_clickhouse_funnel_unordered.ambr +++ /dev/null @@ -1,172 +0,0 @@ -# serializer version: 1 -# name: ClickhouseTestUnorderedFunnelGroups.test_unordered_funnel_with_groups - ''' - /* user_id:0 request:_snapshot_ */ - SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, - countIf(ifNull(equals(steps, 2), 0)) AS step_2, - avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, - median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time - FROM - (SELECT aggregation_target AS aggregation_target, - steps AS steps, - avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, - median(step_1_conversion_time) AS step_1_median_conversion_time_inner - FROM - (SELECT aggregation_target AS aggregation_target, - steps AS steps, - max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, - step_1_conversion_time AS step_1_conversion_time - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - latest_1 AS latest_1, - arraySort([latest_0, latest_1]) AS event_times, - arraySum([if(and(ifNull(less(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 1, 0), 1]) AS steps, - arraySort([latest_0, latest_1]) AS conversion_times, - if(and(isNotNull(conversion_times[2]), ifNull(lessOrEquals(conversion_times[2], plus(toTimeZone(conversion_times[1], 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', conversion_times[1], conversion_times[2]), NULL) AS step_1_conversion_time - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - min(latest_1) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 - FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e.`$group_0` AS aggregation_target, - if(equals(e.event, 'user signed up'), 1, 0) AS step_0, - if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(equals(e.event, 'paid'), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 - FROM events AS e - WHERE and(equals(e.team_id, 99999), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(notEquals(nullIf(nullIf(e.`$group_0`, ''), 'null'), ''), 1)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) - WHERE ifNull(equals(step_0, 1), 0) - UNION ALL SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - latest_1 AS latest_1, - arraySort([latest_0, latest_1]) AS event_times, - arraySum([if(and(ifNull(less(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 1, 0), 1]) AS steps, - arraySort([latest_0, latest_1]) AS conversion_times, - if(and(isNotNull(conversion_times[2]), ifNull(lessOrEquals(conversion_times[2], plus(toTimeZone(conversion_times[1], 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', conversion_times[1], conversion_times[2]), NULL) AS step_1_conversion_time - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - min(latest_1) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 - FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e.`$group_0` AS aggregation_target, - if(equals(e.event, 'paid'), 1, 0) AS step_0, - if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(equals(e.event, 'user signed up'), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 - FROM events AS e - WHERE and(equals(e.team_id, 99999), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(notEquals(nullIf(nullIf(e.`$group_0`, ''), 'null'), ''), 1)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) - WHERE ifNull(equals(step_0, 1), 0))) - GROUP BY aggregation_target, - steps - HAVING ifNull(equals(steps, max(max_steps)), isNull(steps) - and isNull(max(max_steps)))) - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=23622320128, - allow_experimental_analyzer=1 - ''' -# --- -# name: ClickhouseTestUnorderedFunnelGroups.test_unordered_funnel_with_groups.1 - ''' - /* user_id:0 request:_snapshot_ */ - SELECT aggregation_target AS actor_id - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time - FROM - (SELECT *, - arraySort([latest_0,latest_1]) as event_times, - arraySum([if(latest_0 < latest_1 AND latest_1 <= latest_0 + INTERVAL 14 DAY, 1, 0), 1]) AS steps , - arraySort([latest_0,latest_1]) as conversion_times, - if(isNotNull(conversion_times[2]) - AND conversion_times[2] <= conversion_times[1] + INTERVAL 14 DAY, dateDiff('second', conversion_times[1], conversion_times[2]), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'paid', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND ((NOT has([''], "$group_0")) - AND (NOT has([''], "$group_0"))) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 - UNION ALL SELECT *, - arraySort([latest_0,latest_1]) as event_times, - arraySum([if(latest_0 < latest_1 AND latest_1 <= latest_0 + INTERVAL 14 DAY, 1, 0), 1]) AS steps , - arraySort([latest_0,latest_1]) as conversion_times, - if(isNotNull(conversion_times[2]) - AND conversion_times[2] <= conversion_times[1] + INTERVAL 14 DAY, dateDiff('second', conversion_times[1], conversion_times[2]), NULL) step_1_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 - FROM - (SELECT e.timestamp as timestamp, - e."$group_0" as aggregation_target, - if(event = 'paid', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'user signed up', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1 - FROM events e - WHERE team_id = 99999 - AND event IN ['paid', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-14 23:59:59', 'UTC') - AND ((NOT has([''], "$group_0")) - AND (NOT has([''], "$group_0"))) - AND (step_0 = 1 - OR step_1 = 1) )) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2] - ORDER BY aggregation_target - LIMIT 100 - OFFSET 0 SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000 - ''' -# --- diff --git a/ee/clickhouse/views/test/funnel/test_clickhouse_funnel.py b/ee/clickhouse/views/test/funnel/test_clickhouse_funnel.py deleted file mode 100644 index 6bedb7cb2d..0000000000 --- a/ee/clickhouse/views/test/funnel/test_clickhouse_funnel.py +++ /dev/null @@ -1,272 +0,0 @@ -import json -from datetime import datetime - -from ee.api.test.base import LicensedTestMixin -from ee.clickhouse.views.test.funnel.util import ( - EventPattern, - FunnelRequest, - get_funnel_ok, -) -from posthog.constants import INSIGHT_FUNNELS -from posthog.models.group.util import create_group -from posthog.models.group_type_mapping import GroupTypeMapping -from posthog.test.base import ( - APIBaseTest, - ClickhouseTestMixin, - snapshot_clickhouse_queries, -) -from posthog.test.test_journeys import journeys_for - - -class ClickhouseTestFunnelGroups(ClickhouseTestMixin, LicensedTestMixin, APIBaseTest): - maxDiff = None - CLASS_DATA_LEVEL_SETUP = False - - def _create_groups(self): - GroupTypeMapping.objects.create( - team=self.team, project_id=self.team.project_id, group_type="organization", group_type_index=0 - ) - GroupTypeMapping.objects.create( - team=self.team, project_id=self.team.project_id, group_type="company", group_type_index=1 - ) - - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key="org:5", - properties={"industry": "finance"}, - ) - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key="org:6", - properties={"industry": "technology"}, - ) - - create_group( - team_id=self.team.pk, - group_type_index=1, - group_key="company:1", - properties={}, - ) - create_group( - team_id=self.team.pk, - group_type_index=1, - group_key="company:2", - properties={}, - ) - - @snapshot_clickhouse_queries - def test_funnel_aggregation_with_groups(self): - self._create_groups() - - events_by_person = { - "user_1": [ - { - "event": "user signed up", - "timestamp": datetime(2020, 1, 2, 14), - "properties": {"$group_0": "org:5"}, - }, - { - "event": "user signed up", # same person, different group, so should count as different step 1 in funnel - "timestamp": datetime(2020, 1, 10, 14), - "properties": {"$group_0": "org:6"}, - }, - ], - "user_2": [ - { # different person, same group, so should count as step two in funnel - "event": "paid", - "timestamp": datetime(2020, 1, 3, 14), - "properties": {"$group_0": "org:5"}, - } - ], - } - journeys_for(events_by_person, self.team) - - params = FunnelRequest( - events=json.dumps( - [ - EventPattern(id="user signed up", type="events", order=0), - EventPattern(id="paid", type="events", order=1), - ] - ), - date_from="2020-01-01", - date_to="2020-01-14", - aggregation_group_type_index=0, - insight=INSIGHT_FUNNELS, - ) - - result = get_funnel_ok(self.client, self.team.pk, params) - - assert result["user signed up"]["count"] == 2 - assert result["paid"]["count"] == 1 - assert result["paid"]["average_conversion_time"] == 86400 - - @snapshot_clickhouse_queries - def test_funnel_group_aggregation_with_groups_entity_filtering(self): - self._create_groups() - - events_by_person = { - "user_1": [ - { - "event": "user signed up", - "timestamp": datetime(2020, 1, 2, 14), - "properties": {"$group_0": "org:5"}, - } - ], - "user_2": [ - { # different person, same group, so should count as step two in funnel - "event": "paid", - "timestamp": datetime(2020, 1, 3, 14), - "properties": {"$group_0": "org:5"}, - } - ], - "user_3": [ - { # different person, different group, so should be discarded from step 1 in funnel - "event": "user signed up", - "timestamp": datetime(2020, 1, 10, 14), - "properties": {"$group_0": "org:6"}, - } - ], - } - journeys_for(events_by_person, self.team) - - params = FunnelRequest( - events=json.dumps( - [ - EventPattern( - id="user signed up", - type="events", - order=0, - properties={"$group_0": "org:5"}, - ), - EventPattern(id="paid", type="events", order=1), - ] - ), - date_from="2020-01-01", - date_to="2020-01-14", - aggregation_group_type_index=0, - insight=INSIGHT_FUNNELS, - ) - - result = get_funnel_ok(self.client, self.team.pk, params) - - assert result["user signed up"]["count"] == 1 - assert result["paid"]["count"] == 1 - assert result["paid"]["average_conversion_time"] == 86400 - - @snapshot_clickhouse_queries - def test_funnel_with_groups_entity_filtering(self): - self._create_groups() - - events_by_person = { - "user_1": [ - { - "event": "user signed up", - "timestamp": datetime(2020, 1, 2, 14), - "properties": {"$group_0": "org:5"}, - }, - { - "event": "paid", - "timestamp": datetime(2020, 1, 3, 14), - "properties": { - "$group_0": "org:6" - }, # different group, but doesn't matter since not aggregating by groups - }, - { - "event": "user signed up", # event belongs to different group, so shouldn't enter funnel - "timestamp": datetime(2020, 1, 2, 14), - "properties": {"$group_0": "org:6"}, - }, - { - "event": "paid", - "timestamp": datetime(2020, 1, 3, 14), - "properties": {"$group_0": "org:6"}, # event belongs to different group, so shouldn't enter funnel - }, - ] - } - journeys_for(events_by_person, self.team) - - params = FunnelRequest( - events=json.dumps( - [ - EventPattern( - id="user signed up", - type="events", - order=0, - properties={"$group_0": "org:5"}, - ), - EventPattern(id="paid", type="events", order=1), - ] - ), - date_from="2020-01-01", - date_to="2020-01-14", - insight=INSIGHT_FUNNELS, - ) - - result = get_funnel_ok(self.client, self.team.pk, params) - - assert result["user signed up"]["count"] == 1 - assert result["paid"]["count"] == 1 - assert result["paid"]["average_conversion_time"] == 86400 - - @snapshot_clickhouse_queries - def test_funnel_with_groups_global_filtering(self): - self._create_groups() - - events_by_person = { - "user_1": [ - { - "event": "user signed up", - "timestamp": datetime(2020, 1, 2, 14), - "properties": {"$group_0": "org:5"}, - }, - { - "event": "paid", - "timestamp": datetime(2020, 1, 3, 14), - "properties": { - "$group_0": "org:6" - }, # second event belongs to different group, so shouldn't complete funnel - }, - ], - "user_2": [ - { - "event": "user signed up", # event belongs to different group, so shouldn't enter funnel - "timestamp": datetime(2020, 1, 2, 14), - "properties": {"$group_0": "org:6"}, - }, - { - "event": "paid", - "timestamp": datetime(2020, 1, 3, 14), - "properties": {"$group_0": "org:5"}, # same group, but different person, so not in funnel - }, - ], - } - journeys_for(events_by_person, self.team) - - params = FunnelRequest( - events=json.dumps( - [ - EventPattern(id="user signed up", type="events", order=0), - EventPattern(id="paid", type="events", order=1), - ] - ), - date_from="2020-01-01", - date_to="2020-01-14", - insight=INSIGHT_FUNNELS, - properties=json.dumps( - [ - { - "key": "industry", - "value": "finance", - "type": "group", - "group_type_index": 0, - } - ] - ), - ) - - result = get_funnel_ok(self.client, self.team.pk, params) - - assert result["user signed up"]["count"] == 1 - assert result["paid"]["count"] == 0 diff --git a/ee/clickhouse/views/test/funnel/test_clickhouse_funnel_correlation.py b/ee/clickhouse/views/test/funnel/test_clickhouse_funnel_correlation.py deleted file mode 100644 index df6e9311f0..0000000000 --- a/ee/clickhouse/views/test/funnel/test_clickhouse_funnel_correlation.py +++ /dev/null @@ -1,704 +0,0 @@ -import json -from datetime import datetime -from unittest.mock import ANY - -import pytest -from django.core.cache import cache -from freezegun import freeze_time - -from ee.clickhouse.views.test.funnel.util import ( - EventPattern, - FunnelCorrelationRequest, - get_funnel_correlation, - get_funnel_correlation_ok, - get_people_for_correlation_ok, -) -from posthog.constants import FunnelCorrelationType -from posthog.models.element import Element -from posthog.models.team import Team -from posthog.test.base import BaseTest, _create_event, _create_person -from posthog.test.test_journeys import journeys_for - - -@pytest.mark.clickhouse_only -class FunnelCorrelationTest(BaseTest): - """ - Tests for /api/projects/:project_id/funnel/correlation/ - """ - - CLASS_DATA_LEVEL_SETUP = False - - def test_requires_authn(self): - response = get_funnel_correlation( - client=self.client, - team_id=self.team.pk, - request=FunnelCorrelationRequest(date_to="2020-04-04", events=json.dumps([])), - ) - assert response.status_code == 403 - assert response.json() == self.unauthenticated_response() - - def test_event_correlation_endpoint_picks_up_events_for_odds_ratios(self): - with freeze_time("2020-01-01"): - self.client.force_login(self.user) - - # Add in two people: - # - # Person 1 - a single signup event - # Person 2 - a signup event and a view insights event - # - # Both of them have a "watched video" event - # - # We then create Person 3, one successful, the other - # not. Both have not watched the video. - # - # So our contingency table for "watched video" should be - # - # | | success | failure | total | - # | ---------------- | -------- | -------- | -------- | - # | watched | 1 | 1 | 2 | - # | did not watched | 1 | 0 | 1 | - # | total | 2 | 1 | 3 | - # - # For Calculating Odds Ratio, we add a prior count of 1 to everything - # - # So our odds ratio should be - # (success + prior / failure + prior) * (failure_total - failure + prior / success_total - success + prior) - # = ( 1 + 1 / 1 + 1) * ( 1 - 1 + 1 / 2 - 1 + 1) - # = 1 / 2 - - events = { - "Person 1": [ - # Β Failure / watched - {"event": "signup", "timestamp": datetime(2020, 1, 1)}, - {"event": "watched video", "timestamp": datetime(2020, 1, 2)}, - ], - "Person 2": [ - # Β Success / watched - {"event": "signup", "timestamp": datetime(2020, 1, 1)}, - {"event": "watched video", "timestamp": datetime(2020, 1, 2)}, - {"event": "view insights", "timestamp": datetime(2020, 1, 3)}, - ], - "Person 3": [ - # Success / did not watched - {"event": "signup", "timestamp": datetime(2020, 1, 1)}, - {"event": "view insights", "timestamp": datetime(2020, 1, 3)}, - ], - } - - journeys_for(events_by_person=events, team=self.team) - - odds = get_funnel_correlation_ok( - client=self.client, - team_id=self.team.pk, - request=FunnelCorrelationRequest( - events=json.dumps([EventPattern(id="signup"), EventPattern(id="view insights")]), - date_to="2020-04-04", - ), - ) - - assert odds == { - "is_cached": False, - "last_refresh": "2020-01-01T00:00:00Z", - "result": { - "events": [ - { - "event": { - "event": "watched video", - "elements": [], - "properties": {}, - }, - "failure_count": 1, - "success_count": 1, - "success_people_url": ANY, - "failure_people_url": ANY, - "odds_ratio": 1 / 2, - "correlation_type": "failure", - } - ], - "skewed": False, - }, - "query_method": "hogql", - } - - def test_event_correlation_is_partitioned_by_team(self): - """ - Ensure there's no crosstalk between teams - - We check this by: - - 1. loading events into team 1 - 2. checking correlation for team 1 - 3. loading events into team 2 - 4. checking correlation for team 1 again, they should be the same - - """ - with freeze_time("2020-01-01"): - self.client.force_login(self.user) - - events = { - "Person 1": [ - {"event": "watched video", "timestamp": datetime(2019, 1, 2)}, - {"event": "signup", "timestamp": datetime(2020, 1, 1)}, - ], - "Person 2": [ - {"event": "watched video", "timestamp": datetime(2019, 1, 2)}, - {"event": "signup", "timestamp": datetime(2020, 1, 1)}, - {"event": "view insights", "timestamp": datetime(2020, 1, 3)}, - ], - } - - journeys_for(events_by_person=events, team=self.team) - - odds_before = get_funnel_correlation_ok( - client=self.client, - team_id=self.team.pk, - request=FunnelCorrelationRequest( - events=json.dumps([EventPattern(id="signup"), EventPattern(id="view insights")]), - date_to="2020-04-04", - ), - ) - - other_team = create_team(organization=self.organization) - journeys_for(events_by_person=events, team=other_team) - - # We need to make sure we clear the cache so we get the same results again - cache.clear() - - odds_after = get_funnel_correlation_ok( - client=self.client, - team_id=self.team.pk, - request=FunnelCorrelationRequest( - events=json.dumps([EventPattern(id="signup"), EventPattern(id="view insights")]), - date_to="2020-04-04", - ), - ) - - assert odds_before == odds_after - - def test_event_correlation_endpoint_does_not_include_historical_events(self): - with freeze_time("2020-01-01"): - self.client.force_login(self.user) - - # Add in two people: - # - # Person 1 - a single signup event - # Person 2 - a signup event and a view insights event - # - # Both of them have a "watched video" event but they are before the - # signup event - - events = { - "Person 1": [ - {"event": "watched video", "timestamp": datetime(2019, 1, 2)}, - {"event": "signup", "timestamp": datetime(2020, 1, 1)}, - ], - "Person 2": [ - {"event": "watched video", "timestamp": datetime(2019, 1, 2)}, - {"event": "signup", "timestamp": datetime(2020, 1, 1)}, - {"event": "view insights", "timestamp": datetime(2020, 1, 3)}, - ], - } - - journeys_for(events_by_person=events, team=self.team) - - # We need to make sure we clear the cache other tests that have run - # done interfere with this test - cache.clear() - - odds = get_funnel_correlation_ok( - client=self.client, - team_id=self.team.pk, - request=FunnelCorrelationRequest( - events=json.dumps([EventPattern(id="signup"), EventPattern(id="view insights")]), - date_to="2020-04-04", - ), - ) - - assert odds == { - "is_cached": False, - "last_refresh": "2020-01-01T00:00:00Z", - "result": {"events": [], "skewed": False}, - "query_method": "hogql", - } - - def test_event_correlation_endpoint_does_not_include_funnel_steps(self): - with freeze_time("2020-01-01"): - self.client.force_login(self.user) - - # Add Person1 with only the funnel steps involved - - events = { - "Person 1": [ - {"event": "signup", "timestamp": datetime(2020, 1, 1)}, - {"event": "some waypoint", "timestamp": datetime(2020, 1, 2)}, - {"event": "", "timestamp": datetime(2020, 1, 3)}, - ], - # We need atleast 1 success and failure to return a result - "Person 2": [ - {"event": "signup", "timestamp": datetime(2020, 1, 1)}, - {"event": "some waypoint", "timestamp": datetime(2020, 1, 2)}, - {"event": "view insights", "timestamp": datetime(2020, 1, 3)}, - ], - } - # '' is a weird event name to have, but if it exists, our duty to report it - - journeys_for(events_by_person=events, team=self.team) - - # We need to make sure we clear the cache other tests that have run - # done interfere with this test - cache.clear() - - odds = get_funnel_correlation_ok( - client=self.client, - team_id=self.team.pk, - request=FunnelCorrelationRequest( - events=json.dumps( - [ - EventPattern(id="signup"), - EventPattern(id="some waypoint"), - EventPattern(id="view insights"), - ] - ), - date_to="2020-04-04", - ), - ) - - assert odds == { - "is_cached": False, - "last_refresh": "2020-01-01T00:00:00Z", - "result": { - "events": [ - { - "correlation_type": "failure", - "event": {"event": "", "elements": [], "properties": {}}, - "failure_count": 1, - "odds_ratio": 1 / 4, - "success_count": 0, - "success_people_url": ANY, - "failure_people_url": ANY, - } - ], - "skewed": False, - }, - "query_method": "hogql", - } - - def test_events_correlation_endpoint_provides_people_drill_down_urls(self): - """ - Here we are setting up three users, and looking to retrieve one - correlation for watched video, with a url we can use to retrieve people - that successfully completed the funnel AND watched the video, and - another for people that did not complete the funnel but also watched the - video. - """ - - with freeze_time("2020-01-01"): - self.client.force_login(self.user) - - events = { - "Person 1": [ - # Failure / watched - {"event": "signup", "timestamp": datetime(2020, 1, 1)}, - {"event": "watched video", "timestamp": datetime(2020, 1, 2)}, - ], - "Person 2": [ - # Success / watched - {"event": "signup", "timestamp": datetime(2020, 1, 1)}, - {"event": "watched video", "timestamp": datetime(2020, 1, 2)}, - {"event": "view insights", "timestamp": datetime(2020, 1, 3)}, - ], - "Person 3": [ - # Success / did not watched. We don't expect to retrieve - # this one as part of the - {"event": "signup", "timestamp": datetime(2020, 1, 1)}, - {"event": "view insights", "timestamp": datetime(2020, 1, 3)}, - ], - } - - journeys_for(events_by_person=events, team=self.team) - - odds = get_funnel_correlation_ok( - client=self.client, - team_id=self.team.pk, - request=FunnelCorrelationRequest( - events=json.dumps([EventPattern(id="signup"), EventPattern(id="view insights")]), - date_to="2020-04-04", - ), - ) - - assert odds["result"]["events"][0]["event"]["event"] == "watched video" - watched_video_correlation = odds["result"]["events"][0] - - assert get_people_for_correlation_ok(client=self.client, correlation=watched_video_correlation) == { - "success": ["Person 2"], - "failure": ["Person 1"], - } - - def test_events_with_properties_correlation_endpoint_provides_people_drill_down_urls(self): - with freeze_time("2020-01-01"): - self.client.force_login(self.user) - - events = { - "Person 1": [ - # Failure / watched - {"event": "signup", "timestamp": datetime(2020, 1, 1)}, - { - "event": "watched video", - "properties": {"$browser": "1"}, - "timestamp": datetime(2020, 1, 2), - }, - ], - "Person 2": [ - # Success / watched - {"event": "signup", "timestamp": datetime(2020, 1, 1)}, - { - "event": "watched video", - "properties": {"$browser": "1"}, - "timestamp": datetime(2020, 1, 2), - }, - {"event": "view insights", "timestamp": datetime(2020, 1, 3)}, - ], - "Person 3": [ - # Success / watched. We need to have three event instances - # for this test otherwise the endpoint doesn't return results - {"event": "signup", "timestamp": datetime(2020, 1, 1)}, - { - "event": "watched video", - "properties": {"$browser": "1"}, - "timestamp": datetime(2020, 1, 2), - }, - {"event": "view insights", "timestamp": datetime(2020, 1, 3)}, - ], - "Person 4": [ - # Success / didn't watch. Want to use this user to verify - # that we don't pull in unrelated users erroneously - {"event": "signup", "timestamp": datetime(2020, 1, 1)}, - {"event": "view insights", "timestamp": datetime(2020, 1, 3)}, - ], - } - - journeys_for(events_by_person=events, team=self.team) - - odds = get_funnel_correlation_ok( - client=self.client, - team_id=self.team.pk, - request=FunnelCorrelationRequest( - funnel_correlation_type=FunnelCorrelationType.EVENT_WITH_PROPERTIES, - funnel_correlation_event_names=json.dumps(["watched video"]), - events=json.dumps([EventPattern(id="signup"), EventPattern(id="view insights")]), - date_to="2020-04-04", - ), - ) - - assert odds["result"]["events"][0]["event"]["event"] == "watched video::$browser::1" - watched_video_correlation = odds["result"]["events"][0] - - assert get_people_for_correlation_ok(client=self.client, correlation=watched_video_correlation) == { - "success": ["Person 2", "Person 3"], - "failure": ["Person 1"], - } - - def test_correlation_endpoint_with_properties(self): - self.client.force_login(self.user) - - for i in range(10): - _create_person( - distinct_ids=[f"user_{i}"], - team_id=self.team.pk, - properties={"$browser": "Positive"}, - ) - _create_event( - team=self.team, - event="user signed up", - distinct_id=f"user_{i}", - timestamp="2020-01-02T14:00:00Z", - ) - _create_event( - team=self.team, - event="paid", - distinct_id=f"user_{i}", - timestamp="2020-01-04T14:00:00Z", - ) - - for i in range(10, 20): - _create_person( - distinct_ids=[f"user_{i}"], - team_id=self.team.pk, - properties={"$browser": "Negative"}, - ) - _create_event( - team=self.team, - event="user signed up", - distinct_id=f"user_{i}", - timestamp="2020-01-02T14:00:00Z", - ) - if i % 2 == 0: - _create_event( - team=self.team, - event="negatively_related", - distinct_id=f"user_{i}", - timestamp="2020-01-03T14:00:00Z", - ) - - # We need to make sure we clear the cache other tests that have run - # done interfere with this test - cache.clear() - - api_response = get_funnel_correlation_ok( - client=self.client, - team_id=self.team.pk, - request=FunnelCorrelationRequest( - events=json.dumps([EventPattern(id="user signed up"), EventPattern(id="paid")]), - date_to="2020-01-14", - date_from="2020-01-01", - funnel_correlation_type=FunnelCorrelationType.PROPERTIES, - funnel_correlation_names=json.dumps(["$browser"]), - ), - ) - - self.assertFalse(api_response["result"]["skewed"]) - - result = api_response["result"]["events"] - - odds_ratios = [item.pop("odds_ratio") for item in result] - expected_odds_ratios = [121, 1 / 121] - - for odds, expected_odds in zip(odds_ratios, expected_odds_ratios): - self.assertAlmostEqual(odds, expected_odds) - - self.assertEqual( - result, - [ - { - "event": { - "event": "$browser::Positive", - "elements": [], - "properties": {}, - }, - "success_count": 10, - "failure_count": 0, - "success_people_url": ANY, - "failure_people_url": ANY, - # "odds_ratio": 121.0, - "correlation_type": "success", - }, - { - "event": { - "event": "$browser::Negative", - "elements": [], - "properties": {}, - }, - "success_count": 0, - "failure_count": 10, - "success_people_url": ANY, - "failure_people_url": ANY, - # "odds_ratio": 1 / 121, - "correlation_type": "failure", - }, - ], - ) - - def test_properties_correlation_endpoint_provides_people_drill_down_urls(self): - """ - Here we are setting up three users, two with a specified property but - differing values, and one with this property absent. We expect to be - able to use the correlation people drill down urls to retrieve the - associated people for each. - """ - - with freeze_time("2020-01-01"): - self.client.force_login(self.user) - - _create_person( - distinct_ids=["Person 1"], - team_id=self.team.pk, - properties={"$browser": "1"}, - ) - _create_person( - distinct_ids=["Person 2"], - team_id=self.team.pk, - properties={"$browser": "1"}, - ) - _create_person( - distinct_ids=["Person 3"], - team_id=self.team.pk, - properties={}, - ) - - events = { - "Person 1": [ - # Failure / $browser::1 - {"event": "signup", "timestamp": datetime(2020, 1, 1)} - ], - "Person 2": [ - # Success / $browser::1 - {"event": "signup", "timestamp": datetime(2020, 1, 1)}, - {"event": "view insights", "timestamp": datetime(2020, 1, 3)}, - ], - "Person 3": [ - # Success / $browser not set - {"event": "signup", "timestamp": datetime(2020, 1, 1)}, - {"event": "view insights", "timestamp": datetime(2020, 1, 3)}, - ], - } - - journeys_for(events_by_person=events, team=self.team, create_people=False) - - odds = get_funnel_correlation_ok( - client=self.client, - team_id=self.team.pk, - request=FunnelCorrelationRequest( - events=json.dumps([EventPattern(id="signup"), EventPattern(id="view insights")]), - date_to="2020-04-04", - funnel_correlation_type=FunnelCorrelationType.PROPERTIES, - funnel_correlation_names=json.dumps(["$browser"]), - ), - ) - - (browser_correlation,) = ( - correlation - for correlation in odds["result"]["events"] - if correlation["event"]["event"] == "$browser::1" - ) - - (notset_correlation,) = ( - correlation for correlation in odds["result"]["events"] if correlation["event"]["event"] == "$browser::" - ) - - assert get_people_for_correlation_ok(client=self.client, correlation=browser_correlation) == { - "success": ["Person 2"], - "failure": ["Person 1"], - } - - assert get_people_for_correlation_ok(client=self.client, correlation=notset_correlation) == { - "success": ["Person 3"], - "failure": [], - } - - def test_correlation_endpoint_request_with_no_steps_doesnt_fail(self): - """ - This just checks that we get an empty result, this mimics what happens - with other insight endpoints. It's questionable that perhaps this whould - be a 400 instead. - """ - self.client.force_login(self.user) - - with freeze_time("2020-01-01"): - response = get_funnel_correlation_ok( - client=self.client, - team_id=self.team.pk, - request=FunnelCorrelationRequest( - events=json.dumps([]), - date_to="2020-01-14", - date_from="2020-01-01", - funnel_correlation_type=FunnelCorrelationType.PROPERTIES, - funnel_correlation_names=json.dumps(["$browser"]), - ), - ) - - assert response == { - "is_cached": False, - "last_refresh": "2020-01-01T00:00:00Z", - "result": {"events": [], "skewed": False}, - "query_method": "hogql", - } - - def test_funnel_correlation_with_event_properties_autocapture(self): - self.client.force_login(self.user) - - # Need a minimum of 3 hits to get a correlation result - for i in range(3): - _create_person(distinct_ids=[f"user_{i}"], team_id=self.team.pk) - _create_event( - team=self.team, - event="user signed up", - distinct_id=f"user_{i}", - timestamp="2020-01-02T14:00:00Z", - ) - _create_event( - team=self.team, - event="$autocapture", - distinct_id=f"user_{i}", - elements=[Element(nth_of_type=1, nth_child=0, tag_name="a", href="/movie")], - timestamp="2020-01-03T14:00:00Z", - properties={"signup_source": "email", "$event_type": "click"}, - ) - _create_event( - team=self.team, - event="paid", - distinct_id=f"user_{i}", - timestamp="2020-01-04T14:00:00Z", - ) - - # Atleast one person that fails, to ensure we get results - _create_person(distinct_ids=[f"user_fail"], team_id=self.team.pk) - _create_event( - team=self.team, - event="user signed up", - distinct_id=f"user_fail", - timestamp="2020-01-02T14:00:00Z", - ) - - with freeze_time("2020-01-01"): - response = get_funnel_correlation_ok( - client=self.client, - team_id=self.team.pk, - request=FunnelCorrelationRequest( - events=json.dumps([EventPattern(id="user signed up"), EventPattern(id="paid")]), - date_to="2020-01-14", - date_from="2020-01-01", - funnel_correlation_type=FunnelCorrelationType.EVENT_WITH_PROPERTIES, - funnel_correlation_event_names=json.dumps(["$autocapture"]), - ), - ) - - assert response == { - "result": { - "events": [ - { - "success_count": 3, - "failure_count": 0, - "success_people_url": ANY, - "failure_people_url": ANY, - "odds_ratio": 8.0, - "correlation_type": "success", - "event": { - "event": '$autocapture::elements_chain::click__~~__a:href="/movie"nth-child="0"nth-of-type="1"', - "properties": {"$event_type": "click"}, - "elements": [ - { - "event": None, - "text": None, - "tag_name": "a", - "attr_class": None, - "href": "/movie", - "attr_id": None, - "nth_child": 0, - "nth_of_type": 1, - "attributes": {}, - "order": 0, - } - ], - }, - } - ], - "skewed": False, - }, - "last_refresh": "2020-01-01T00:00:00Z", - "is_cached": False, - "query_method": "hogql", - } - - assert get_people_for_correlation_ok(client=self.client, correlation=response["result"]["events"][0]) == { - "success": ["user_0", "user_1", "user_2"], - "failure": [], - } - - -@pytest.fixture(autouse=True) -def clear_django_cache(): - cache.clear() - - -def create_team(organization): - return Team.objects.create(name="Test Team", organization=organization) diff --git a/ee/clickhouse/views/test/funnel/test_clickhouse_funnel_person.py b/ee/clickhouse/views/test/funnel/test_clickhouse_funnel_person.py deleted file mode 100644 index a230df5918..0000000000 --- a/ee/clickhouse/views/test/funnel/test_clickhouse_funnel_person.py +++ /dev/null @@ -1,415 +0,0 @@ -import json -from unittest.mock import patch - -from django.core.cache import cache -from rest_framework import status - -from posthog.constants import INSIGHT_FUNNELS -from posthog.models.group.util import create_group -from posthog.models.instance_setting import get_instance_setting -from posthog.models.person import Person -from posthog.test.base import ( - APIBaseTest, - ClickhouseTestMixin, - _create_event, - _create_person, - snapshot_clickhouse_queries, -) - - -class TestFunnelPerson(ClickhouseTestMixin, APIBaseTest): - def _create_sample_data(self, num, delete=False): - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key="g0", - properties={"slug": "g0", "name": "g0"}, - ) - - for i in range(num): - if delete: - person = Person.objects.create(distinct_ids=[f"user_{i}"], team=self.team) - else: - _create_person(distinct_ids=[f"user_{i}"], team=self.team) - _create_event( - event="step one", - distinct_id=f"user_{i}", - team=self.team, - timestamp="2021-05-01 00:00:00", - properties={"$browser": "Chrome", "$group_0": "g0"}, - ) - _create_event( - event="step two", - distinct_id=f"user_{i}", - team=self.team, - timestamp="2021-05-03 00:00:00", - properties={"$browser": "Chrome", "$group_0": "g0"}, - ) - _create_event( - event="step three", - distinct_id=f"user_{i}", - team=self.team, - timestamp="2021-05-05 00:00:00", - properties={"$browser": "Chrome", "$group_0": "g0"}, - ) - if delete: - person.delete() - - def test_basic_format(self): - self._create_sample_data(5) - request_data = { - "insight": INSIGHT_FUNNELS, - "interval": "day", - "actions": json.dumps([]), - "events": json.dumps( - [ - {"id": "step one", "order": 0}, - {"id": "step two", "order": 1}, - {"id": "step three", "order": 2}, - ] - ), - "properties": json.dumps([]), - "funnel_window_days": 14, - "funnel_step": 1, - "filter_test_accounts": "false", - "new_entity": json.dumps([]), - "date_from": "2021-05-01", - "date_to": "2021-05-10", - } - - response = self.client.get("/api/person/funnel/", data=request_data) - self.assertEqual(response.status_code, status.HTTP_200_OK) - j = response.json() - first_person = j["results"][0]["people"][0] - self.assertEqual(5, len(j["results"][0]["people"])) - self.assertTrue("id" in first_person and "name" in first_person and "distinct_ids" in first_person) - self.assertEqual(5, j["results"][0]["count"]) - - @snapshot_clickhouse_queries - def test_funnel_actors_with_groups_search(self): - self._create_sample_data(5) - - request_data = { - "aggregation_group_type_index": 0, - "search": "g0", - "breakdown_attribution_type": "first_touch", - "insight": INSIGHT_FUNNELS, - "interval": "day", - "actions": json.dumps([]), - "events": json.dumps( - [ - {"id": "step one", "order": 0}, - {"id": "step two", "order": 1}, - {"id": "step three", "order": 2}, - ] - ), - "properties": json.dumps([]), - "funnel_window_days": 14, - "funnel_step": 1, - "filter_test_accounts": "false", - "new_entity": json.dumps([]), - "date_from": "2021-05-01", - "date_to": "2021-05-10", - } - - response = self.client.get("/api/person/funnel/", data=request_data) - self.assertEqual(response.status_code, status.HTTP_200_OK) - j = response.json() - self.assertEqual(1, len(j["results"][0]["people"])) - self.assertEqual(1, j["results"][0]["count"]) - - def test_basic_pagination(self): - cache.clear() - self._create_sample_data(110) - request_data = { - "insight": INSIGHT_FUNNELS, - "interval": "day", - "actions": json.dumps([]), - "events": json.dumps( - [ - {"id": "step one", "order": 0}, - {"id": "step two", "order": 1}, - {"id": "step three", "order": 2}, - ] - ), - "properties": json.dumps([]), - "funnel_window_days": 14, - "funnel_step": 1, - "filter_test_accounts": "false", - "new_entity": json.dumps([]), - "date_from": "2021-05-01", - "date_to": "2021-05-10", - } - - response = self.client.get("/api/person/funnel/", data=request_data) - self.assertEqual(response.status_code, status.HTTP_200_OK) - j = response.json() - people = j["results"][0]["people"] - next = j["next"] - self.assertEqual(100, len(people)) - self.assertNotEqual(None, next) - - response = self.client.get(next) - self.assertEqual(response.status_code, status.HTTP_200_OK) - j = response.json() - people = j["results"][0]["people"] - next = j["next"] - self.assertEqual(10, len(people)) - self.assertEqual(None, j["next"]) - - def test_breakdown_basic_pagination(self): - cache.clear() - self._create_sample_data(110) - request_data = { - "insight": INSIGHT_FUNNELS, - "interval": "day", - "actions": json.dumps([]), - "events": json.dumps( - [ - {"id": "step one", "order": 0}, - {"id": "step two", "order": 1}, - {"id": "step three", "order": 2}, - ] - ), - "properties": json.dumps([]), - "funnel_window_days": 14, - "funnel_step": 1, - "filter_test_accounts": "false", - "new_entity": json.dumps([]), - "date_from": "2021-05-01", - "date_to": "2021-05-10", - "breakdown_type": "event", - "breakdown": "$browser", - "funnel_step_breakdown": "Chrome", - } - - response = self.client.get("/api/person/funnel/", data=request_data) - self.assertEqual(response.status_code, status.HTTP_200_OK) - j = response.json() - people = j["results"][0]["people"] - next = j["next"] - self.assertEqual(100, len(people)) - - response = self.client.get(next) - self.assertEqual(response.status_code, status.HTTP_200_OK) - j = response.json() - people = j["results"][0]["people"] - next = j["next"] - self.assertEqual(10, len(people)) - self.assertEqual(None, j["next"]) - - @patch("posthog.models.person.util.delete_person") - def test_basic_pagination_with_deleted(self, delete_person_patch): - if not get_instance_setting("PERSON_ON_EVENTS_ENABLED"): - return - - cache.clear() - self._create_sample_data(20, delete=True) - request_data = { - "insight": INSIGHT_FUNNELS, - "interval": "day", - "actions": json.dumps([]), - "events": json.dumps( - [ - {"id": "step one", "order": 0}, - {"id": "step two", "order": 1}, - {"id": "step three", "order": 2}, - ] - ), - "properties": json.dumps([]), - "funnel_window_days": 14, - "funnel_step": 1, - "filter_test_accounts": "false", - "new_entity": json.dumps([]), - "date_from": "2021-05-01", - "date_to": "2021-05-10", - "limit": 15, - } - - response = self.client.get("/api/person/funnel/", data=request_data) - self.assertEqual(response.status_code, status.HTTP_200_OK) - j = response.json() - people = j["results"][0]["people"] - next = j["next"] - missing_persons = j["missing_persons"] - self.assertEqual(0, len(people)) - self.assertEqual(15, missing_persons) - self.assertIsNotNone(next) - - response = self.client.get(next) - self.assertEqual(response.status_code, status.HTTP_200_OK) - j = response.json() - people = j["results"][0]["people"] - next = j["next"] - missing_persons = j["missing_persons"] - self.assertEqual(0, len(people)) - self.assertEqual(5, missing_persons) - self.assertIsNone(next) - - def test_breakdowns(self): - request_data = { - "insight": INSIGHT_FUNNELS, - "interval": "day", - "actions": json.dumps([]), - "properties": json.dumps([]), - "funnel_step": 1, - "filter_test_accounts": "false", - "new_entity": json.dumps([]), - "events": json.dumps( - [ - {"id": "sign up", "order": 0}, - {"id": "play movie", "order": 1}, - {"id": "buy", "order": 2}, - ] - ), - "insight": INSIGHT_FUNNELS, - "date_from": "2020-01-01", - "date_to": "2020-01-08", - "funnel_window_days": 7, - "breakdown": "$browser", - "funnel_step_breakdown": "Chrome", - } - - # event - _create_person(distinct_ids=["person1"], team_id=self.team.pk) - _create_event( - team=self.team, - event="sign up", - distinct_id="person1", - properties={"key": "val", "$browser": "Chrome"}, - timestamp="2020-01-01T12:00:00Z", - ) - _create_event( - team=self.team, - event="play movie", - distinct_id="person1", - properties={"key": "val", "$browser": "Chrome"}, - timestamp="2020-01-01T13:00:00Z", - ) - _create_event( - team=self.team, - event="buy", - distinct_id="person1", - properties={"key": "val", "$browser": "Chrome"}, - timestamp="2020-01-01T15:00:00Z", - ) - - _create_person(distinct_ids=["person2"], team_id=self.team.pk) - _create_event( - team=self.team, - event="sign up", - distinct_id="person2", - properties={"key": "val", "$browser": "Safari"}, - timestamp="2020-01-02T14:00:00Z", - ) - _create_event( - team=self.team, - event="play movie", - distinct_id="person2", - properties={"key": "val", "$browser": "Safari"}, - timestamp="2020-01-02T16:00:00Z", - ) - - _create_person(distinct_ids=["person3"], team_id=self.team.pk) - _create_event( - team=self.team, - event="sign up", - distinct_id="person3", - properties={"key": "val", "$browser": "Safari"}, - timestamp="2020-01-02T14:00:00Z", - ) - - response = self.client.get("/api/person/funnel/", data=request_data) - self.assertEqual(response.status_code, status.HTTP_200_OK) - j = response.json() - - people = j["results"][0]["people"] - self.assertEqual(1, len(people)) - self.assertEqual(None, j["next"]) - - response = self.client.get( - "/api/person/funnel/", - data={**request_data, "funnel_step_breakdown": "Safari"}, - ) - self.assertEqual(response.status_code, status.HTTP_200_OK) - j = response.json() - - people = j["results"][0]["people"] - self.assertEqual(2, len(people)) - self.assertEqual(None, j["next"]) - - -class TestFunnelCorrelationActors(ClickhouseTestMixin, APIBaseTest): - """ - Tests for /api/projects/:project_id/persons/funnel/correlation/ - """ - - def test_pagination(self): - cache.clear() - - for i in range(10): - _create_person(distinct_ids=[f"user_{i}"], team_id=self.team.pk) - _create_event( - team=self.team, - event="user signed up", - distinct_id=f"user_{i}", - timestamp="2020-01-02T14:00:00Z", - ) - _create_event( - team=self.team, - event="positively_related", - distinct_id=f"user_{i}", - timestamp="2020-01-03T14:00:00Z", - ) - _create_event( - team=self.team, - event="paid", - distinct_id=f"user_{i}", - timestamp="2020-01-04T14:00:00Z", - ) - - request_data = { - "events": json.dumps( - [ - {"id": "user signed up", "type": "events", "order": 0}, - {"id": "paid", "type": "events", "order": 1}, - ] - ), - "insight": INSIGHT_FUNNELS, - "date_from": "2020-01-01", - "date_to": "2020-01-14", - "funnel_correlation_type": "events", - "funnel_correlation_person_converted": "true", - "funnel_correlation_person_limit": 4, - "funnel_correlation_person_entity": json.dumps({"id": "positively_related", "type": "events"}), - } - - response = self.client.get( - f"/api/projects/{self.team.pk}/persons/funnel/correlation", - data=request_data, - ) - self.assertEqual(response.status_code, status.HTTP_200_OK) - j = response.json() - - first_person = j["results"][0]["people"][0] - self.assertEqual(4, len(j["results"][0]["people"])) - self.assertTrue("id" in first_person and "name" in first_person and "distinct_ids" in first_person) - self.assertEqual(4, j["results"][0]["count"]) - - next = j["next"] - response = self.client.get(next) - self.assertEqual(response.status_code, status.HTTP_200_OK) - j = response.json() - - people = j["results"][0]["people"] - next = j["next"] - self.assertEqual(4, len(people)) - self.assertNotEqual(None, next) - - response = self.client.get(next) - self.assertEqual(response.status_code, status.HTTP_200_OK) - j = response.json() - people = j["results"][0]["people"] - next = j["next"] - self.assertEqual(2, len(people)) - self.assertEqual(None, j["next"]) diff --git a/ee/clickhouse/views/test/funnel/test_clickhouse_funnel_trends_person.py b/ee/clickhouse/views/test/funnel/test_clickhouse_funnel_trends_person.py deleted file mode 100644 index 3459e4bf13..0000000000 --- a/ee/clickhouse/views/test/funnel/test_clickhouse_funnel_trends_person.py +++ /dev/null @@ -1,279 +0,0 @@ -import json - -from rest_framework import status - -from posthog.constants import INSIGHT_FUNNELS, FunnelOrderType, FunnelVizType -from posthog.test.base import ( - APIBaseTest, - ClickhouseTestMixin, - _create_event, - _create_person, -) - - -class TestFunnelTrendsPerson(ClickhouseTestMixin, APIBaseTest): - def test_basic_format(self): - user_a = _create_person(distinct_ids=["user a"], team=self.team) - - _create_event( - event="step one", - distinct_id="user a", - team=self.team, - timestamp="2021-06-07 19:00:00", - ) - - common_request_data = { - "insight": INSIGHT_FUNNELS, - "funnel_viz_type": FunnelVizType.TRENDS, - "interval": "day", - "date_from": "2021-06-07", - "date_to": "2021-06-13 23:59:59", - "funnel_window_days": 7, - "events": json.dumps( - [ - {"id": "step one", "order": 0}, - {"id": "step two", "order": 1}, - {"id": "step three", "order": 2}, - ] - ), - "properties": json.dumps([]), - "funnel_window_days": 7, - "new_entity": json.dumps([]), - } - - # 1 user who dropped off starting 2021-06-07 - response_1 = self.client.get( - "/api/person/funnel/", - data={ - **common_request_data, - "entrance_period_start": "2021-06-07", - "drop_off": True, - }, - ) - response_1_data = response_1.json() - - self.assertEqual(response_1.status_code, status.HTTP_200_OK) - self.assertEqual( - [person["id"] for person in response_1_data["results"][0]["people"]], - [str(user_a.uuid)], - ) - - # No users converted 2021-06-07 - response_2 = self.client.get( - "/api/person/funnel/", - data={ - **common_request_data, - "entrance_period_start": "2021-06-07 00:00", - "drop_off": False, - }, - ) - response_2_data = response_2.json() - - self.assertEqual(response_2.status_code, status.HTTP_200_OK) - self.assertEqual([person["id"] for person in response_2_data["results"][0]["people"]], []) - - # No users dropped off starting 2021-06-08 - response_3 = self.client.get( - "/api/person/funnel/", - data={ - **common_request_data, - "entrance_period_start": "2021-06-08", - "drop_off": True, - }, - ) - response_3_data = response_3.json() - - self.assertEqual(response_3.status_code, status.HTTP_200_OK) - self.assertEqual([person["id"] for person in response_3_data["results"][0]["people"]], []) - - def test_strict_order(self): - user_a = _create_person(distinct_ids=["user a"], team=self.team) - user_b = _create_person(distinct_ids=["user b"], team=self.team) - - _create_event( - event="step one", - distinct_id="user a", - team=self.team, - timestamp="2021-06-07 19:00:00", - ) - _create_event( - event="step two", - distinct_id="user a", - team=self.team, - timestamp="2021-06-07 19:00:01", - ) - _create_event( - event="step one", - distinct_id="user a", - team=self.team, - timestamp="2021-06-07 19:00:02", - ) - _create_event( - event="step three", - distinct_id="user a", - team=self.team, - timestamp="2021-06-07 19:00:03", - ) - - _create_event( - event="step one", - distinct_id="user b", - team=self.team, - timestamp="2021-06-07 19:00:00", - ) - _create_event( - event="step two", - distinct_id="user b", - team=self.team, - timestamp="2021-06-07 19:00:01", - ) - _create_event( - event="step three", - distinct_id="user b", - team=self.team, - timestamp="2021-06-07 19:00:03", - ) - - common_request_data = { - "insight": INSIGHT_FUNNELS, - "funnel_viz_type": FunnelVizType.TRENDS, - "interval": "day", - "date_from": "2021-06-07", - "date_to": "2021-06-13 23:59:59", - "funnel_window_days": 7, - "funnel_order_type": FunnelOrderType.STRICT, - "events": json.dumps( - [ - {"id": "step one", "order": 0}, - {"id": "step two", "order": 1}, - {"id": "step three", "order": 2}, - ] - ), - "properties": json.dumps([]), - "funnel_window_days": 7, - "new_entity": json.dumps([]), - } - - # 1 user who dropped off - response_1 = self.client.get( - "/api/person/funnel/", - data={ - **common_request_data, - "entrance_period_start": "2021-06-07", - "drop_off": True, - }, - ) - response_1_data = response_1.json() - - self.assertEqual(response_1.status_code, status.HTTP_200_OK) - self.assertEqual( - [person["id"] for person in response_1_data["results"][0]["people"]], - [str(user_a.uuid)], - ) - - # 1 user who successfully converted - response_1 = self.client.get( - "/api/person/funnel/", - data={ - **common_request_data, - "entrance_period_start": "2021-06-07", - "drop_off": False, - }, - ) - response_1_data = response_1.json() - - self.assertEqual(response_1.status_code, status.HTTP_200_OK) - self.assertEqual( - [person["id"] for person in response_1_data["results"][0]["people"]], - [str(user_b.uuid)], - ) - - def test_unordered(self): - user_a = _create_person(distinct_ids=["user a"], team=self.team) - user_b = _create_person(distinct_ids=["user b"], team=self.team) - - _create_event( - event="step one", - distinct_id="user a", - team=self.team, - timestamp="2021-06-07 19:00:00", - ) - _create_event( - event="step three", - distinct_id="user a", - team=self.team, - timestamp="2021-06-07 19:00:03", - ) - - _create_event( - event="step one", - distinct_id="user b", - team=self.team, - timestamp="2021-06-07 19:00:00", - ) - _create_event( - event="step three", - distinct_id="user b", - team=self.team, - timestamp="2021-06-07 19:00:01", - ) - _create_event( - event="step two", - distinct_id="user b", - team=self.team, - timestamp="2021-06-07 19:00:02", - ) - - common_request_data = { - "insight": INSIGHT_FUNNELS, - "funnel_viz_type": FunnelVizType.TRENDS, - "interval": "day", - "date_from": "2021-06-07", - "date_to": "2021-06-13 23:59:59", - "funnel_window_days": 7, - "funnel_order_type": FunnelOrderType.UNORDERED, - "events": json.dumps( - [ - {"id": "step one", "order": 0}, - {"id": "step two", "order": 1}, - {"id": "step three", "order": 2}, - ] - ), - "properties": json.dumps([]), - "funnel_window_days": 7, - "new_entity": json.dumps([]), - } - - # 1 user who dropped off - response_1 = self.client.get( - "/api/person/funnel/", - data={ - **common_request_data, - "entrance_period_start": "2021-06-07", - "drop_off": True, - }, - ) - response_1_data = response_1.json() - - self.assertEqual(response_1.status_code, status.HTTP_200_OK) - self.assertEqual( - [person["id"] for person in response_1_data["results"][0]["people"]], - [str(user_a.uuid)], - ) - - # 1 user who successfully converted - response_1 = self.client.get( - "/api/person/funnel/", - data={ - **common_request_data, - "entrance_period_start": "2021-06-07", - "drop_off": False, - }, - ) - response_1_data = response_1.json() - - self.assertEqual(response_1.status_code, status.HTTP_200_OK) - self.assertEqual( - [person["id"] for person in response_1_data["results"][0]["people"]], - [str(user_b.uuid)], - ) diff --git a/ee/clickhouse/views/test/funnel/test_clickhouse_funnel_unordered.py b/ee/clickhouse/views/test/funnel/test_clickhouse_funnel_unordered.py deleted file mode 100644 index b5ffad91de..0000000000 --- a/ee/clickhouse/views/test/funnel/test_clickhouse_funnel_unordered.py +++ /dev/null @@ -1,101 +0,0 @@ -import json -from datetime import datetime - -from ee.api.test.base import LicensedTestMixin -from ee.clickhouse.views.test.funnel.util import ( - EventPattern, - FunnelRequest, - get_funnel_ok, -) -from posthog.constants import INSIGHT_FUNNELS -from posthog.models.group.util import create_group -from posthog.models.group_type_mapping import GroupTypeMapping -from posthog.test.base import ( - APIBaseTest, - ClickhouseTestMixin, - snapshot_clickhouse_queries, -) -from posthog.test.test_journeys import journeys_for - - -class ClickhouseTestUnorderedFunnelGroups(ClickhouseTestMixin, LicensedTestMixin, APIBaseTest): - maxDiff = None - CLASS_DATA_LEVEL_SETUP = False - - @snapshot_clickhouse_queries - def test_unordered_funnel_with_groups(self): - GroupTypeMapping.objects.create( - team=self.team, project_id=self.team.project_id, group_type="organization", group_type_index=0 - ) - GroupTypeMapping.objects.create( - team=self.team, project_id=self.team.project_id, group_type="company", group_type_index=1 - ) - - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key="org:5", - properties={"industry": "finance"}, - ) - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key="org:6", - properties={"industry": "technology"}, - ) - - create_group( - team_id=self.team.pk, - group_type_index=1, - group_key="company:1", - properties={}, - ) - create_group( - team_id=self.team.pk, - group_type_index=1, - group_key="company:2", - properties={}, - ) - - events_by_person = { - "user_1": [ - { - "event": "user signed up", - "timestamp": datetime(2020, 1, 3, 14), - "properties": {"$group_0": "org:5"}, - }, - { # same person, different group, so should count as different step 1 in funnel - "event": "user signed up", - "timestamp": datetime(2020, 1, 10, 14), - "properties": {"$group_0": "org:6"}, - }, - ], - "user_2": [ - { # different person, same group, so should count as step two in funnel - "event": "paid", - "timestamp": datetime(2020, 1, 2, 14), - "properties": {"$group_0": "org:5"}, - } - ], - } - journeys_for(events_by_person, self.team) - - params = FunnelRequest( - events=json.dumps( - [ - EventPattern(id="user signed up", type="events", order=0), - EventPattern(id="paid", type="events", order=1), - ] - ), - date_from="2020-01-01", - date_to="2020-01-14", - aggregation_group_type_index=0, - funnel_order_type="unordered", - insight=INSIGHT_FUNNELS, - ) - - result = get_funnel_ok(self.client, self.team.pk, params) - - assert result["Completed 1 step"]["count"] == 2 - assert result["Completed 2 steps"]["count"] == 1 - assert result["Completed 2 steps"]["average_conversion_time"] == 86400 diff --git a/ee/clickhouse/views/test/funnel/util.py b/ee/clickhouse/views/test/funnel/util.py deleted file mode 100644 index cd28a74837..0000000000 --- a/ee/clickhouse/views/test/funnel/util.py +++ /dev/null @@ -1,96 +0,0 @@ -import dataclasses -from typing import Any, Literal, Optional, TypedDict, Union - -from django.test.client import Client - -from ee.clickhouse.queries.funnels.funnel_correlation import EventOddsRatioSerialized -from posthog.constants import FunnelCorrelationType -from posthog.models.property import GroupTypeIndex - - -class EventPattern(TypedDict, total=False): - id: str - type: Union[Literal["events"], Literal["actions"]] - order: int - properties: dict[str, Any] - - -@dataclasses.dataclass -class FunnelCorrelationRequest: - # Needs to be json encoded list of `EventPattern`s - events: str - date_to: str - funnel_step: Optional[int] = None - date_from: Optional[str] = None - funnel_correlation_type: Optional[FunnelCorrelationType] = None - # Needs to be json encoded list of `str`s - funnel_correlation_names: Optional[str] = None - funnel_correlation_event_names: Optional[str] = None - - -@dataclasses.dataclass -class FunnelRequest: - events: str - date_from: str - insight: str - aggregation_group_type_index: Optional[GroupTypeIndex] = None - date_to: Optional[str] = None - properties: Optional[str] = None - funnel_order_type: Optional[str] = None - - -def get_funnel(client: Client, team_id: int, request: FunnelRequest): - return client.post( - f"/api/projects/{team_id}/insights/funnel", - data={key: value for key, value in dataclasses.asdict(request).items() if value is not None}, - ) - - -def get_funnel_ok(client: Client, team_id: int, request: FunnelRequest) -> dict[str, Any]: - response = get_funnel(client=client, team_id=team_id, request=request) - - assert response.status_code == 200, response.content - res = response.json() - final = {} - - for step in res["result"]: - final[step["name"]] = step - - return final - - -def get_funnel_correlation(client: Client, team_id: int, request: FunnelCorrelationRequest): - return client.get( - f"/api/projects/{team_id}/insights/funnel/correlation", - data={key: value for key, value in dataclasses.asdict(request).items() if value is not None}, - ) - - -def get_funnel_correlation_ok(client: Client, team_id: int, request: FunnelCorrelationRequest) -> dict[str, Any]: - response = get_funnel_correlation(client=client, team_id=team_id, request=request) - - assert response.status_code == 200, response.content - return response.json() - - -def get_people_for_correlation_ok(client: Client, correlation: EventOddsRatioSerialized) -> dict[str, Any]: - """ - Helper for getting people for a correlation. Note we keep checking to just - inclusion of name, to make the stable to changes in other people props. - """ - success_people_url = correlation["success_people_url"] - failure_people_url = correlation["failure_people_url"] - - if not success_people_url or not failure_people_url: - return {} - - success_people_response = client.get(success_people_url) - assert success_people_response.status_code == 200, success_people_response.content - - failure_people_response = client.get(failure_people_url) - assert failure_people_response.status_code == 200, failure_people_response.content - - return { - "success": sorted([person["name"] for person in success_people_response.json()["results"][0]["people"]]), - "failure": sorted([person["name"] for person in failure_people_response.json()["results"][0]["people"]]), - } diff --git a/ee/clickhouse/views/test/test_clickhouse_experiment_secondary_results.py b/ee/clickhouse/views/test/test_clickhouse_experiment_secondary_results.py deleted file mode 100644 index d2af178a77..0000000000 --- a/ee/clickhouse/views/test/test_clickhouse_experiment_secondary_results.py +++ /dev/null @@ -1,1317 +0,0 @@ -from typing import Any -from flaky import flaky - - -from ee.api.test.base import APILicensedTest -from posthog.models.signals import mute_selected_signals -from posthog.test.base import ClickhouseTestMixin, snapshot_clickhouse_queries -from posthog.test.test_journeys import journeys_for - -DEFAULT_JOURNEYS_FOR_PAYLOAD: dict[str, list[dict[str, Any]]] = { - # For a trend pageview metric - "person1": [ - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test"}, - } - ], - "person2": [ - { - "event": "$pageview", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - "person3": [ - { - "event": "$pageview", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "control"}, - } - ], - # doesn't have feature set - "person_out_of_control": [{"event": "$pageview", "timestamp": "2020-01-03"}], - "person_out_of_end_date": [ - { - "event": "$pageview", - "timestamp": "2020-08-03", - "properties": {"$feature/a-b-test": "control"}, - } - ], - # wrong feature set somehow - "person_out_of_feature_control": [ - { - "event": "$pageview", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "ablahebf"}, - } - ], - # for a funnel conversion metric - "person1_funnel": [ - { - "event": "$pageview_funnel", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test"}, - }, - { - "event": "$pageleave_funnel", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "test"}, - }, - ], - "person2_funnel": [ - { - "event": "$pageview_funnel", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageleave_funnel", - "timestamp": "2020-01-05", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - "person3_funnel": [ - { - "event": "$pageview_funnel", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageleave_funnel", - "timestamp": "2020-01-05", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - # doesn't have feature set - "person_out_of_control_funnel": [ - {"event": "$pageview_funnel", "timestamp": "2020-01-03"}, - {"event": "$pageleave_funnel", "timestamp": "2020-01-05"}, - ], - "person_out_of_end_date_funnel": [ - { - "event": "$pageview_funnel", - "timestamp": "2020-08-03", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageleave_funnel", - "timestamp": "2020-08-05", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - # non-converters with FF - "person4_funnel": [ - { - "event": "$pageview_funnel", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "test"}, - } - ], - "person5_funnel": [ - { - "event": "$pageview_funnel", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "test"}, - } - ], -} - -DEFAULT_EXPERIMENT_CREATION_PAYLOAD = { - "name": "Test Experiment", - "description": "", - "start_date": "2020-01-01T00:00", - "end_date": "2020-01-06T00:00", - "feature_flag_key": "a-b-test", - "parameters": {}, - "secondary_metrics": [ - { - "name": "trends whatever", - "filters": { - "insight": "trends", - "events": [{"order": 0, "id": "$pageview"}], - "properties": [ - { - "key": "$geoip_country_name", - "type": "person", - "value": ["france"], - "operator": "exact", - } - # properties superceded by FF breakdown - ], - }, - }, - { - "name": "funnels whatever", - "filters": { - "insight": "funnels", - "events": [ - {"order": 0, "id": "$pageview_funnel"}, - {"order": 1, "id": "$pageleave_funnel"}, - ], - "properties": [ - { - "key": "$geoip_country_name", - "type": "person", - "value": ["france"], - "operator": "exact", - } - # properties superceded by FF breakdown - ], - }, - }, - ], - # target metric insignificant since we're testing secondaries right now - "filters": {"insight": "trends", "events": [{"order": 0, "id": "whatever"}]}, -} - - -@flaky(max_runs=10, min_passes=1) -class ClickhouseTestExperimentSecondaryResults(ClickhouseTestMixin, APILicensedTest): - @snapshot_clickhouse_queries - def test_basic_secondary_metric_results(self): - journeys_for( - DEFAULT_JOURNEYS_FOR_PAYLOAD, - self.team, - ) - - # :KLUDGE: Avoid calling sync_insight_caching_state which messes with snapshots - with mute_selected_signals(): - # generates the FF which should result in the above events^ - creation_response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - DEFAULT_EXPERIMENT_CREATION_PAYLOAD, - ) - - id = creation_response.json()["id"] - - response = self.client.get(f"/api/projects/{self.team.id}/experiments/{id}/secondary_results?id=0") - self.assertEqual(200, response.status_code) - - response_data = response.json()["result"] - - self.assertEqual(len(response_data["result"].items()), 2) - - self.assertEqual(response_data["result"]["control"], 3) - self.assertEqual(response_data["result"]["test"], 1) - - response = self.client.get(f"/api/projects/{self.team.id}/experiments/{id}/secondary_results?id=1") - self.assertEqual(200, response.status_code) - - response_data = response.json()["result"] - - self.assertEqual(len(response_data["result"].items()), 2) - - self.assertAlmostEqual(response_data["result"]["control"], 1) - self.assertEqual(response_data["result"]["test"], round(1 / 3, 3)) - - def test_basic_secondary_metric_results_cached(self): - journeys_for( - DEFAULT_JOURNEYS_FOR_PAYLOAD, - self.team, - ) - - # generates the FF which should result in the above events^ - creation_response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - DEFAULT_EXPERIMENT_CREATION_PAYLOAD, - ) - - id = creation_response.json()["id"] - - response = self.client.get(f"/api/projects/{self.team.id}/experiments/{id}/secondary_results?id=0") - self.assertEqual(200, response.status_code) - - response_data = response.json() - self.assertEqual(response_data.pop("is_cached"), False) - - response_data = response_data["result"] - self.assertEqual(len(response_data["result"].items()), 2) - - self.assertEqual(response_data["result"]["control"], 3) - self.assertEqual(response_data["result"]["test"], 1) - - response = self.client.get(f"/api/projects/{self.team.id}/experiments/{id}/secondary_results?id=1") - self.assertEqual(200, response.status_code) - - response_data = response.json() - result_data = response_data["result"] - - self.assertEqual(len(result_data["result"].items()), 2) - - self.assertAlmostEqual(result_data["result"]["control"], 1) - self.assertEqual(result_data["result"]["test"], round(1 / 3, 3)) - - response2 = self.client.get(f"/api/projects/{self.team.id}/experiments/{id}/secondary_results?id=1") - response2_data = response2.json() - - self.assertEqual(response2_data.pop("is_cached"), True) - self.assertEqual(response2_data["result"], response_data["result"]) - - def test_secondary_metric_results_for_multiple_variants(self): - journeys_for( - { - # trend metric first - "person1_2_trend": [ - { - "event": "$pageview_trend", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test_2"}, - } - ], - "person1_1_trend": [ - { - "event": "$pageview_trend", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test_1"}, - } - ], - "person2_1_trend": [ - { - "event": "$pageview_trend", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test_1"}, - } - ], - "person2_trend": [ - { - "event": "$pageview_trend", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "control"}, - } - ], - "person3_trend": [ - { - "event": "$pageview_trend", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "control"}, - } - ], - "person4_trend": [ - { - "event": "$pageview_trend", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "control"}, - } - ], - # doesn't have feature set - "person_out_of_control": [{"event": "$pageview_trend", "timestamp": "2020-01-03"}], - "person_out_of_end_date": [ - { - "event": "$pageview_trend", - "timestamp": "2020-08-03", - "properties": {"$feature/a-b-test": "control"}, - } - ], - # funnel metric second - "person1_2": [ - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test_2"}, - }, - { - "event": "$pageleave", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "test_2"}, - }, - ], - "person1_1": [ - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test_1"}, - }, - { - "event": "$pageleave", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "test_1"}, - }, - ], - "person2_1": [ - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test_1"}, - }, - { - "event": "$pageleave", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "test_1"}, - }, - ], - "person1": [ - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test"}, - }, - { - "event": "$pageleave", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "test"}, - }, - ], - "person2": [ - { - "event": "$pageview", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageleave", - "timestamp": "2020-01-05", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - "person3": [ - { - "event": "$pageview", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageleave", - "timestamp": "2020-01-05", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - # doesn't have feature set - "person_out_of_control": [ - {"event": "$pageview", "timestamp": "2020-01-03"}, - {"event": "$pageleave", "timestamp": "2020-01-05"}, - ], - "person_out_of_end_date": [ - { - "event": "$pageview", - "timestamp": "2020-08-03", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageleave", - "timestamp": "2020-08-05", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - # non-converters with FF - "person4": [ - { - "event": "$pageview", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "test"}, - } - ], - "person5": [ - { - "event": "$pageview", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "test"}, - } - ], - "person6_1": [ - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test_1"}, - } - ], - }, - self.team, - ) - - ff_key = "a-b-test" - # generates the FF which should result in the above events^ - creation_response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "", - "start_date": "2020-01-01T00:00", - "end_date": "2020-01-06T00:00", - "feature_flag_key": ff_key, - "parameters": { - "feature_flag_variants": [ - { - "key": "control", - "name": "Control Group", - "rollout_percentage": 25, - }, - { - "key": "test_1", - "name": "Test Variant 1", - "rollout_percentage": 25, - }, - { - "key": "test_2", - "name": "Test Variant 2", - "rollout_percentage": 25, - }, - { - "key": "test", - "name": "Test Variant 3", - "rollout_percentage": 25, - }, - ] - }, - "secondary_metrics": [ - { - "name": "secondary metric", - "filters": { - "insight": "trends", - "events": [{"order": 0, "id": "$pageview_trend"}], - }, - }, - { - "name": "funnel metric", - "filters": { - "insight": "funnels", - "events": [ - {"order": 0, "id": "$pageview"}, - {"order": 1, "id": "$pageleave"}, - ], - }, - }, - ], - # target metric insignificant since we're testing secondaries right now - "filters": { - "insight": "trends", - "events": [{"order": 0, "id": "whatever"}], - }, - }, - ) - - id = creation_response.json()["id"] - - response = self.client.get(f"/api/projects/{self.team.id}/experiments/{id}/secondary_results?id=0") - self.assertEqual(200, response.status_code) - - response_data = response.json()["result"] - - # trend missing 'test' variant, so it's not in the results - self.assertEqual(len(response_data["result"].items()), 3) - - self.assertEqual(response_data["result"]["control"], 3) - self.assertEqual(response_data["result"]["test_1"], 2) - self.assertEqual(response_data["result"]["test_2"], 1) - - response = self.client.get(f"/api/projects/{self.team.id}/experiments/{id}/secondary_results?id=1") - self.assertEqual(200, response.status_code) - - response_data = response.json()["result"] - - # funnel not missing 'test' variant, so it's in the results - self.assertEqual(len(response_data["result"].items()), 4) - - self.assertAlmostEqual(response_data["result"]["control"], 1) - self.assertAlmostEqual(response_data["result"]["test"], round(1 / 3, 3)) - self.assertAlmostEqual(response_data["result"]["test_1"], round(2 / 3, 3)) - self.assertAlmostEqual(response_data["result"]["test_2"], 1) - - def test_secondary_metric_results_for_multiple_variants_with_trend_count_per_actor(self): - journeys_for( - { - # trend metric first - "person1_2_trend": [ - { - "event": "$pageview_trend", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test_2"}, - } - ], - "person1_1_trend": [ - { - "event": "$pageview_trend", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test_1"}, - } - ], - "person2_1_trend": [ - { - "event": "$pageview_trend", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test_1"}, - } - ], - "person2_trend": [ - { - "event": "$pageview_trend", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "control"}, - } - ], - "person3_trend": [ - { - "event": "$pageview_trend", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "control"}, - } - ], - "person4_trend": [ - { - "event": "$pageview_trend", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "control"}, - } - ], - # doesn't have feature set - "person_out_of_control": [{"event": "$pageview_trend", "timestamp": "2020-01-03"}], - "person_out_of_end_date": [ - { - "event": "$pageview_trend", - "timestamp": "2020-08-03", - "properties": {"$feature/a-b-test": "control"}, - } - ], - # avg count per user metric second - "person1_2": [ - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test_2"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test_2"}, - }, - ], - "person1_1": [ - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test_1"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test_1"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test_1"}, - }, - ], - "person2_1": [ - { - "event": "$pageview", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "test_1"}, - }, - ], - "person1": [ - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test"}, - }, - ], - "person2": [ - { - "event": "$pageview", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - "person3": [ - { - "event": "$pageview", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - # doesn't have feature set - "person_out_of_control": [ - {"event": "$pageview", "timestamp": "2020-01-03"}, - {"event": "$pageleave", "timestamp": "2020-01-05"}, - ], - "person_out_of_end_date": [ - { - "event": "$pageview", - "timestamp": "2020-08-03", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageleave", - "timestamp": "2020-08-05", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - # non-converters with FF - "person4": [ - { - "event": "$pageview", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "test"}, - } - ], - "person5": [ - { - "event": "$pageview", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "test"}, - } - ], - "person6_1": [ - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test_1"}, - } - ], - }, - self.team, - ) - - ff_key = "a-b-test" - # generates the FF which should result in the above events^ - creation_response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "", - "start_date": "2020-01-01T00:00", - "end_date": "2020-01-06T00:00", - "feature_flag_key": ff_key, - "parameters": { - "feature_flag_variants": [ - { - "key": "control", - "name": "Control Group", - "rollout_percentage": 25, - }, - { - "key": "test_1", - "name": "Test Variant 1", - "rollout_percentage": 25, - }, - { - "key": "test_2", - "name": "Test Variant 2", - "rollout_percentage": 25, - }, - { - "key": "test", - "name": "Test Variant 3", - "rollout_percentage": 25, - }, - ] - }, - "secondary_metrics": [ - { - "name": "secondary metric", - "filters": { - "insight": "trends", - "events": [{"order": 0, "id": "$pageview_trend"}], - }, - }, - { - "name": "funnel metric", - "filters": { - "insight": "trends", - "events": [ - { - "order": 0, - "id": "$pageview", - "math": "avg_count_per_actor", - } - ], - }, - }, - ], - # target metric insignificant since we're testing secondaries right now - "filters": { - "insight": "trends", - "events": [{"order": 0, "id": "whatever"}], - }, - }, - ) - - id = creation_response.json()["id"] - - response = self.client.get(f"/api/projects/{self.team.id}/experiments/{id}/secondary_results?id=0") - self.assertEqual(200, response.status_code) - - response_data = response.json()["result"] - - # trend missing 'test' variant, so it's not in the results - self.assertEqual(len(response_data["result"].items()), 3) - - self.assertEqual(response_data["result"]["control"], 3) - self.assertEqual(response_data["result"]["test_1"], 2) - self.assertEqual(response_data["result"]["test_2"], 1) - - response = self.client.get(f"/api/projects/{self.team.id}/experiments/{id}/secondary_results?id=1") - self.assertEqual(200, response.status_code) - - response_data = response.json()["result"] - - # funnel not missing 'test' variant, so it's in the results - self.assertEqual(len(response_data["result"].items()), 4) - - self.assertAlmostEqual(response_data["result"]["control"], round(3.5 / 6, 3), 3) - self.assertAlmostEqual(response_data["result"]["test"], 0.5) - self.assertAlmostEqual(response_data["result"]["test_1"], 0.5) - self.assertAlmostEqual(response_data["result"]["test_2"], round(1 / 3, 3), 3) - - def test_secondary_metric_results_for_multiple_variants_with_trend_count_per_property_value(self): - journeys_for( - { - # trend metric first - "person1_2_trend": [ - { - "event": "$pageview_trend", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test_2"}, - } - ], - "person1_1_trend": [ - { - "event": "$pageview_trend", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test_1"}, - } - ], - "person2_1_trend": [ - { - "event": "$pageview_trend", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test_1"}, - } - ], - "person2_trend": [ - { - "event": "$pageview_trend", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "control"}, - } - ], - "person3_trend": [ - { - "event": "$pageview_trend", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "control"}, - } - ], - "person4_trend": [ - { - "event": "$pageview_trend", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "control"}, - } - ], - # doesn't have feature set - "person_out_of_control": [{"event": "$pageview_trend", "timestamp": "2020-01-03"}], - "person_out_of_end_date": [ - { - "event": "$pageview_trend", - "timestamp": "2020-08-03", - "properties": {"$feature/a-b-test": "control"}, - } - ], - # avg per mathable property second - "person1_2": [ - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test_2", "mathable": 1}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test_2", "mathable": 2}, - }, - ], - "person1_1": [ - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test_1", "mathable": 1}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test_1", "mathable": 2}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test_1", "mathable": 3}, - }, - ], - "person2_1": [ - { - "event": "$pageview", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "test_1", "mathable": 10}, - }, - ], - "person1": [ - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test", "mathable": 200}, - }, - ], - "person2": [ - { - "event": "$pageview", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - "person3": [ - { - "event": "$pageview", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - # doesn't have feature set - "person_out_of_control": [ - {"event": "$pageview", "timestamp": "2020-01-03"}, - {"event": "$pageleave", "timestamp": "2020-01-05"}, - ], - "person_out_of_end_date": [ - { - "event": "$pageview", - "timestamp": "2020-08-03", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageleave", - "timestamp": "2020-08-05", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - }, - self.team, - ) - - ff_key = "a-b-test" - # generates the FF which should result in the above events^ - creation_response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "", - "start_date": "2020-01-01T00:00", - "end_date": "2020-01-06T00:00", - "feature_flag_key": ff_key, - "parameters": { - "feature_flag_variants": [ - { - "key": "control", - "name": "Control Group", - "rollout_percentage": 25, - }, - { - "key": "test_1", - "name": "Test Variant 1", - "rollout_percentage": 25, - }, - { - "key": "test_2", - "name": "Test Variant 2", - "rollout_percentage": 25, - }, - { - "key": "test", - "name": "Test Variant 3", - "rollout_percentage": 25, - }, - ] - }, - "secondary_metrics": [ - { - "name": "secondary metric", - "filters": { - "insight": "trends", - "events": [{"order": 0, "id": "$pageview_trend"}], - }, - }, - { - "name": "funnel metric", - "filters": { - "insight": "trends", - "events": [ - { - "order": 0, - "id": "$pageview", - "math": "avg", - "math_property": "mathable", - } - ], - }, - }, - ], - # target metric insignificant since we're testing secondaries right now - "filters": { - "insight": "trends", - "events": [{"order": 0, "id": "whatever"}], - }, - }, - ) - - id = creation_response.json()["id"] - - response = self.client.get(f"/api/projects/{self.team.id}/experiments/{id}/secondary_results?id=0") - self.assertEqual(200, response.status_code) - - response_data = response.json()["result"] - - # trend missing 'test' variant, so it's not in the results - self.assertEqual(len(response_data["result"].items()), 3) - - self.assertEqual(response_data["result"]["control"], 3) - self.assertEqual(response_data["result"]["test_1"], 2) - self.assertEqual(response_data["result"]["test_2"], 1) - - response = self.client.get(f"/api/projects/{self.team.id}/experiments/{id}/secondary_results?id=1") - self.assertEqual(200, response.status_code) - - response_data = response.json()["result"] - - self.assertEqual(len(response_data["result"].items()), 4) - - self.assertAlmostEqual(response_data["result"]["control"], 0, 3) - self.assertAlmostEqual(response_data["result"]["test"], 33.3333, 3) - self.assertAlmostEqual(response_data["result"]["test_1"], 2, 3) - self.assertAlmostEqual(response_data["result"]["test_2"], 0.25, 3) - - def test_metrics_without_full_flag_information_are_valid(self): - journeys_for( - { - # doesn't have feature set - "person_out_of_control": [{"event": "$pageview_funnel", "timestamp": "2020-01-03"}], - "person_out_of_end_date": [ - { - "event": "$pageview_funnel", - "timestamp": "2020-08-03", - "properties": {"$feature/a-b-test": "control"}, - } - ], - # has invalid feature set - "person_out_of_all_controls": [ - { - "event": "$pageview_funnel", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "XYZABC"}, - } - ], - # for a funnel conversion metric - "person1_funnel": [ - { - "event": "$pageview_funnel", - "timestamp": "2020-01-02", - # "properties": {"$feature/a-b-test": "test"}, - }, - { - "event": "$pageleave_funnel", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "test"}, - }, - ], - "person2_funnel": [ - { - "event": "$pageview_funnel", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageleave_funnel", - "timestamp": "2020-01-05", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - "person3_funnel": [ - { - "event": "$pageview_funnel", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageleave_funnel", - "timestamp": "2020-01-05", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - # doesn't have feature set - "person_out_of_control_funnel": [ - {"event": "$pageview_funnel", "timestamp": "2020-01-03"}, - {"event": "$pageleave_funnel", "timestamp": "2020-01-05"}, - ], - "person_out_of_end_date_funnel": [ - { - "event": "$pageview_funnel", - "timestamp": "2020-08-03", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageleave_funnel", - "timestamp": "2020-08-05", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - # non-converters with FF - "person4_funnel": [ - { - "event": "$pageview_funnel", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "test"}, - } - ], - "person5_funnel": [ - { - "event": "$pageview_funnel", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "test"}, - } - ], - }, - self.team, - ) - - ff_key = "a-b-test" - # generates the FF which should result in the above events^ - creation_response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "", - "start_date": "2020-01-01T00:00", - "end_date": "2020-01-06T00:00", - "feature_flag_key": ff_key, - "parameters": {}, - "secondary_metrics": [ - { - "name": "funnels whatever", - "filters": { - "insight": "funnels", - "events": [ - {"order": 0, "id": "$pageview_funnel"}, - {"order": 1, "id": "$pageleave_funnel"}, - ], - "properties": [ - { - "key": "$geoip_country_name", - "type": "person", - "value": ["france"], - "operator": "exact", - } - # properties superceded by FF breakdown - ], - }, - }, - ], - # target metric insignificant since we're testing secondaries right now - "filters": { - "insight": "trends", - "events": [{"order": 0, "id": "whatever"}], - }, - }, - ) - - id = creation_response.json()["id"] - response = self.client.get(f"/api/projects/{self.team.id}/experiments/{id}/secondary_results?id=0") - self.assertEqual(200, response.status_code) - - response_data = response.json() - result_data = response_data["result"] - - self.assertEqual(len(result_data["result"].items()), 2) - self.assertAlmostEqual(result_data["result"]["control"], 1) - self.assertEqual(result_data["result"]["test"], 0.333) - - self.assertEqual( - set(response_data["result"].keys()), - { - "result", - "insight", - "filters", - "probability", - "significant", - "significance_code", - "expected_loss", - "credible_intervals", - "variants", - }, - ) - - self.assertEqual( - response_data["result"]["variants"], - [ - { - "failure_count": 0, - "key": "control", - "success_count": 2, - }, - { - "failure_count": 2, - "key": "test", - "success_count": 1, - }, - ], - ) - - self.assertFalse(response_data["result"]["significant"]) - self.assertEqual(response_data["result"]["significance_code"], "not_enough_exposure") - - def test_no_metric_validation_errors_for_secondary_metrics(self): - journeys_for( - { - # for trend metric, no test - "person2": [ - { - "event": "$pageview", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - # doesn't have feature set - "person_out_of_control": [{"event": "$pageview_funnel", "timestamp": "2020-01-03"}], - "person_out_of_end_date": [ - { - "event": "$pageview_funnel", - "timestamp": "2020-08-03", - "properties": {"$feature/a-b-test": "control"}, - } - ], - # has invalid feature set - "person_out_of_all_controls": [ - { - "event": "$pageview_funnel", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "XYZABC"}, - } - ], - # for a funnel conversion metric - no control variant - "person1_funnel": [ - { - "event": "$pageview_funnel", - "timestamp": "2020-01-02", - # "properties": {"$feature/a-b-test": "test"}, - }, - { - "event": "$pageleave_funnel", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "test"}, - }, - ], - # doesn't have feature set - "person_out_of_control_funnel": [ - {"event": "$pageview_funnel", "timestamp": "2020-01-03"}, - {"event": "$pageleave_funnel", "timestamp": "2020-01-05"}, - ], - "person_out_of_end_date_funnel": [ - { - "event": "$pageview_funnel", - "timestamp": "2020-08-03", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageleave_funnel", - "timestamp": "2020-08-05", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - # non-converters with FF - "person4_funnel": [ - { - "event": "$pageview_funnel", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "test"}, - } - ], - "person5_funnel": [ - { - "event": "$pageview_funnel", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "test"}, - } - ], - }, - self.team, - ) - - creation_response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - DEFAULT_EXPERIMENT_CREATION_PAYLOAD, - ) - - id = creation_response.json()["id"] - - response = self.client.get(f"/api/projects/{self.team.id}/experiments/{id}/secondary_results?id=0") - self.assertEqual(200, response.status_code) - - response_data = response.json() - result_data = response_data["result"] - - assert set(response_data["result"].keys()) == { - "result", - "insight", - "filters", - "exposure_filters", - } - - self.assertEqual(result_data["result"]["control"], 2) - assert "test" not in result_data["result"] - - response = self.client.get(f"/api/projects/{self.team.id}/experiments/{id}/secondary_results?id=1") - self.assertEqual(200, response.status_code) - - response_data = response.json() - result_data = response_data["result"] - - self.assertEqual(len(response_data["result"].items()), 3) - - assert set(response_data["result"].keys()) == { - "result", - "insight", - "filters", - } - - assert "control" not in result_data["result"] - - self.assertEqual(result_data["result"]["test"], 0.333) diff --git a/ee/clickhouse/views/test/test_clickhouse_experiments.py b/ee/clickhouse/views/test/test_clickhouse_experiments.py deleted file mode 100644 index 308dbdc207..0000000000 --- a/ee/clickhouse/views/test/test_clickhouse_experiments.py +++ /dev/null @@ -1,4926 +0,0 @@ -from datetime import datetime, timedelta, UTC -from django.core.cache import cache -from flaky import flaky -from rest_framework import status - -from ee.api.test.base import APILicensedTest -from dateutil import parser - -from posthog.models import WebExperiment -from posthog.models.action.action import Action -from posthog.models.cohort.cohort import Cohort -from posthog.models.experiment import Experiment -from posthog.models.feature_flag import FeatureFlag, get_feature_flags_for_team_in_cache -from posthog.schema import ExperimentSignificanceCode -from posthog.test.base import ( - ClickhouseTestMixin, - _create_event, - _create_person, - flush_persons_and_events, - snapshot_clickhouse_insert_cohortpeople_queries, - snapshot_clickhouse_queries, - FuzzyInt, -) -from posthog.test.test_journeys import journeys_for - - -class TestExperimentCRUD(APILicensedTest): - # List experiments - def test_can_list_experiments(self): - response = self.client.get(f"/api/projects/{self.team.id}/experiments/") - self.assertEqual(response.status_code, status.HTTP_200_OK) - - def test_getting_experiments_is_not_nplus1(self) -> None: - self.client.post( - f"/api/projects/{self.team.id}/experiments/", - data={ - "name": "Test Experiment", - "feature_flag_key": f"flag_0", - "filters": {"events": [{"order": 0, "id": "$pageview"}]}, - "start_date": "2021-12-01T10:23", - "parameters": None, - }, - format="json", - ).json() - - self.client.post( - f"/api/projects/{self.team.id}/experiments/", - data={ - "name": "Test Experiment", - "feature_flag_key": f"exp_flag_000", - "filters": {"events": [{"order": 0, "id": "$pageview"}]}, - "start_date": "2021-12-01T10:23", - "end_date": "2021-12-01T10:23", - "archived": True, - "parameters": None, - }, - format="json", - ).json() - - with self.assertNumQueries(FuzzyInt(13, 14)): - response = self.client.get(f"/api/projects/{self.team.id}/experiments") - self.assertEqual(response.status_code, status.HTTP_200_OK) - - for i in range(1, 5): - self.client.post( - f"/api/projects/{self.team.id}/experiments/", - data={ - "name": "Test Experiment", - "feature_flag_key": f"flag_{i}", - "filters": {"events": [{"order": 0, "id": "$pageview"}]}, - "start_date": "2021-12-01T10:23", - "parameters": None, - }, - format="json", - ).json() - - with self.assertNumQueries(FuzzyInt(13, 14)): - response = self.client.get(f"/api/projects/{self.team.id}/experiments") - self.assertEqual(response.status_code, status.HTTP_200_OK) - - def test_creating_updating_basic_experiment(self): - ff_key = "a-b-tests" - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "", - "start_date": "2021-12-01T10:23", - "end_date": None, - "feature_flag_key": ff_key, - "parameters": None, - "filters": { - "events": [ - {"order": 0, "id": "$pageview"}, - {"order": 1, "id": "$pageleave"}, - ], - "properties": [], - }, - }, - ) - - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - self.assertEqual(response.json()["name"], "Test Experiment") - self.assertEqual(response.json()["feature_flag_key"], ff_key) - self.assertEqual(response.json()["stats_config"], {"version": 2}) - - id = response.json()["id"] - experiment = Experiment.objects.get(pk=id) - self.assertEqual(experiment.get_stats_config("version"), 2) - - created_ff = FeatureFlag.objects.get(key=ff_key) - - self.assertEqual(created_ff.key, ff_key) - self.assertEqual(created_ff.filters["multivariate"]["variants"][0]["key"], "control") - self.assertEqual(created_ff.filters["multivariate"]["variants"][1]["key"], "test") - self.assertEqual(created_ff.filters["groups"][0]["properties"], []) - - end_date = "2021-12-10T00:00" - - # Now update - response = self.client.patch( - f"/api/projects/{self.team.id}/experiments/{id}", - {"description": "Bazinga", "end_date": end_date, "stats_config": {"version": 1}}, - ) - - self.assertEqual(response.status_code, status.HTTP_200_OK) - - experiment = Experiment.objects.get(pk=id) - self.assertEqual(experiment.description, "Bazinga") - self.assertEqual(experiment.end_date.strftime("%Y-%m-%dT%H:%M"), end_date) - self.assertEqual(experiment.get_stats_config("version"), 1) - - def test_creating_updating_web_experiment(self): - ff_key = "a-b-tests" - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "type": "web", - "description": "", - "start_date": "2021-12-01T10:23", - "end_date": None, - "feature_flag_key": ff_key, - "parameters": None, - "filters": { - "events": [ - {"order": 0, "id": "$pageview"}, - {"order": 1, "id": "$pageleave"}, - ], - "properties": [], - }, - }, - ) - - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - self.assertEqual(response.json()["name"], "Test Experiment") - self.assertEqual(response.json()["feature_flag_key"], ff_key) - web_experiment_id = response.json()["id"] - self.assertEqual( - WebExperiment.objects.get(pk=web_experiment_id).variants, - {"test": {"rollout_percentage": 50}, "control": {"rollout_percentage": 50}}, - ) - - created_ff = FeatureFlag.objects.get(key=ff_key) - - self.assertEqual(created_ff.key, ff_key) - self.assertEqual(created_ff.filters["multivariate"]["variants"][0]["key"], "control") - self.assertEqual(created_ff.filters["multivariate"]["variants"][1]["key"], "test") - self.assertEqual(created_ff.filters["groups"][0]["properties"], []) - - id = response.json()["id"] - end_date = "2021-12-10T00:00" - - # Now update - response = self.client.patch( - f"/api/projects/{self.team.id}/experiments/{id}", - {"description": "Bazinga", "end_date": end_date}, - ) - - self.assertEqual(response.status_code, status.HTTP_200_OK) - - experiment = Experiment.objects.get(pk=id) - self.assertEqual(experiment.description, "Bazinga") - self.assertEqual(experiment.end_date.strftime("%Y-%m-%dT%H:%M"), end_date) - - def test_transferring_holdout_to_another_group(self): - response = self.client.post( - f"/api/projects/{self.team.id}/experiment_holdouts/", - data={ - "name": "Test Experiment holdout", - "filters": [ - { - "properties": [], - "rollout_percentage": 20, - "variant": "holdout", - } - ], - }, - format="json", - ) - - holdout_id = response.json()["id"] - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - self.assertEqual(response.json()["name"], "Test Experiment holdout") - self.assertEqual( - response.json()["filters"], - [{"properties": [], "rollout_percentage": 20, "variant": f"holdout-{holdout_id}"}], - ) - - # Generate draft experiment to be part of holdout - ff_key = "a-b-tests" - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "", - "start_date": None, - "end_date": None, - "feature_flag_key": ff_key, - "parameters": None, - "filters": { - "events": [ - {"order": 0, "id": "$pageview"}, - {"order": 1, "id": "$pageleave"}, - ], - "properties": [], - }, - "holdout_id": holdout_id, - }, - ) - - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - self.assertEqual(response.json()["name"], "Test Experiment") - self.assertEqual(response.json()["feature_flag_key"], ff_key) - - created_ff = FeatureFlag.objects.get(key=ff_key) - - self.assertEqual(created_ff.key, ff_key) - self.assertEqual( - created_ff.filters["holdout_groups"], - [{"properties": [], "rollout_percentage": 20, "variant": f"holdout-{holdout_id}"}], - ) - - exp_id = response.json()["id"] - - # new holdout, and update experiment - response = self.client.post( - f"/api/projects/{self.team.id}/experiment_holdouts/", - data={ - "name": "Test Experiment holdout 2", - "filters": [ - { - "properties": [], - "rollout_percentage": 5, - "variant": "holdout", - } - ], - }, - format="json", - ) - holdout_2_id = response.json()["id"] - - response = self.client.patch( - f"/api/projects/{self.team.id}/experiments/{exp_id}", - {"holdout_id": holdout_2_id}, - ) - - self.assertEqual(response.status_code, status.HTTP_200_OK) - - experiment = Experiment.objects.get(pk=exp_id) - self.assertEqual(experiment.holdout_id, holdout_2_id) - - created_ff = FeatureFlag.objects.get(key=ff_key) - self.assertEqual( - created_ff.filters["holdout_groups"], - [{"properties": [], "rollout_percentage": 5, "variant": f"holdout-{holdout_2_id}"}], - ) - - # update parameters - response = self.client.patch( - f"/api/projects/{self.team.id}/experiments/{exp_id}", - { - "parameters": { - "feature_flag_variants": [ - { - "key": "control", - "name": "Control Group", - "rollout_percentage": 33, - }, - { - "key": "test_1", - "name": "Test Variant", - "rollout_percentage": 33, - }, - { - "key": "test_2", - "name": "Test Variant", - "rollout_percentage": 34, - }, - ] - }, - }, - ) - - experiment = Experiment.objects.get(pk=exp_id) - self.assertEqual(experiment.holdout_id, holdout_2_id) - - created_ff = FeatureFlag.objects.get(key=ff_key) - self.assertEqual( - created_ff.filters["holdout_groups"], - [{"properties": [], "rollout_percentage": 5, "variant": f"holdout-{holdout_2_id}"}], - ) - self.assertEqual( - created_ff.filters["multivariate"]["variants"], - [ - {"key": "control", "name": "Control Group", "rollout_percentage": 33}, - {"key": "test_1", "name": "Test Variant", "rollout_percentage": 33}, - {"key": "test_2", "name": "Test Variant", "rollout_percentage": 34}, - ], - ) - - # remove holdouts - response = self.client.patch( - f"/api/projects/{self.team.id}/experiments/{exp_id}", - {"holdout_id": None}, - ) - - self.assertEqual(response.status_code, status.HTTP_200_OK) - - experiment = Experiment.objects.get(pk=exp_id) - self.assertEqual(experiment.holdout_id, None) - - created_ff = FeatureFlag.objects.get(key=ff_key) - self.assertEqual(created_ff.filters["holdout_groups"], None) - - # try adding invalid holdout - response = self.client.patch( - f"/api/projects/{self.team.id}/experiments/{exp_id}", - {"holdout_id": 123456}, - ) - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json()["detail"], 'Invalid pk "123456" - object does not exist.') - - # add back holdout - response = self.client.patch( - f"/api/projects/{self.team.id}/experiments/{exp_id}", - {"holdout_id": holdout_2_id}, - ) - - # launch experiment and try updating holdouts again - response = self.client.patch( - f"/api/projects/{self.team.id}/experiments/{exp_id}", - {"start_date": "2021-12-01T10:23"}, - ) - - self.assertEqual(response.status_code, status.HTTP_200_OK) - - response = self.client.patch( - f"/api/projects/{self.team.id}/experiments/{exp_id}", - {"holdout_id": holdout_id}, - ) - - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json()["detail"], "Can't update holdout on running Experiment") - - created_ff = FeatureFlag.objects.get(key=ff_key) - self.assertEqual( - created_ff.filters["holdout_groups"], - [{"properties": [], "rollout_percentage": 5, "variant": f"holdout-{holdout_2_id}"}], - ) - - def test_saved_metrics(self): - response = self.client.post( - f"/api/projects/{self.team.id}/experiment_saved_metrics/", - { - "name": "Test Experiment saved metric", - "description": "Test description", - "query": { - "kind": "ExperimentTrendsQuery", - "count_query": { - "kind": "TrendsQuery", - "series": [{"kind": "EventsNode", "event": "$pageview"}], - }, - }, - }, - ) - - saved_metric_id = response.json()["id"] - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - self.assertEqual(response.json()["name"], "Test Experiment saved metric") - self.assertEqual(response.json()["description"], "Test description") - self.assertEqual( - response.json()["query"], - { - "kind": "ExperimentTrendsQuery", - "count_query": {"kind": "TrendsQuery", "series": [{"kind": "EventsNode", "event": "$pageview"}]}, - }, - ) - self.assertEqual(response.json()["created_by"]["id"], self.user.pk) - - # Generate experiment to have saved metric - ff_key = "a-b-tests" - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "", - "start_date": "2021-12-01T10:23", - "end_date": None, - "feature_flag_key": ff_key, - "parameters": None, - "filters": { - "events": [ - {"order": 0, "id": "$pageview"}, - {"order": 1, "id": "$pageleave"}, - ], - "properties": [], - }, - "saved_metrics_ids": [{"id": saved_metric_id, "metadata": {"type": "secondary"}}], - }, - ) - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - - exp_id = response.json()["id"] - - self.assertEqual(response.json()["name"], "Test Experiment") - self.assertEqual(response.json()["feature_flag_key"], ff_key) - - self.assertEqual(Experiment.objects.get(pk=exp_id).saved_metrics.count(), 1) - experiment_to_saved_metric = Experiment.objects.get(pk=exp_id).experimenttosavedmetric_set.first() - self.assertEqual(experiment_to_saved_metric.metadata, {"type": "secondary"}) - saved_metric = Experiment.objects.get(pk=exp_id).saved_metrics.first() - self.assertEqual(saved_metric.id, saved_metric_id) - self.assertEqual( - saved_metric.query, - { - "kind": "ExperimentTrendsQuery", - "count_query": {"kind": "TrendsQuery", "series": [{"kind": "EventsNode", "event": "$pageview"}]}, - }, - ) - - # Now try updating experiment with new saved metric - response = self.client.post( - f"/api/projects/{self.team.id}/experiment_saved_metrics/", - { - "name": "Test Experiment saved metric 2", - "description": "Test description 2", - "query": { - "kind": "ExperimentTrendsQuery", - "count_query": {"kind": "TrendsQuery", "series": [{"kind": "EventsNode", "event": "$pageleave"}]}, - }, - }, - ) - - saved_metric_2_id = response.json()["id"] - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - self.assertEqual(response.json()["name"], "Test Experiment saved metric 2") - - response = self.client.patch( - f"/api/projects/{self.team.id}/experiments/{exp_id}", - { - "saved_metrics_ids": [ - {"id": saved_metric_id, "metadata": {"type": "secondary"}}, - {"id": saved_metric_2_id, "metadata": {"type": "tertiary"}}, - ] - }, - ) - - self.assertEqual(response.status_code, status.HTTP_200_OK) - - self.assertEqual(Experiment.objects.get(pk=exp_id).saved_metrics.count(), 2) - experiment_to_saved_metric = Experiment.objects.get(pk=exp_id).experimenttosavedmetric_set.all() - self.assertEqual(experiment_to_saved_metric[0].metadata, {"type": "secondary"}) - self.assertEqual(experiment_to_saved_metric[1].metadata, {"type": "tertiary"}) - saved_metric = Experiment.objects.get(pk=exp_id).saved_metrics.all() - self.assertEqual(sorted([saved_metric[0].id, saved_metric[1].id]), [saved_metric_id, saved_metric_2_id]) - - response = self.client.patch( - f"/api/projects/{self.team.id}/experiments/{exp_id}", - {"saved_metrics_ids": []}, - ) - self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(Experiment.objects.get(pk=exp_id).saved_metrics.count(), 0) - - response = self.client.patch( - f"/api/projects/{self.team.id}/experiments/{exp_id}", - { - "saved_metrics_ids": [ - {"id": saved_metric_id, "metadata": {"type": "secondary"}}, - ] - }, - ) - - response = self.client.patch( - f"/api/projects/{self.team.id}/experiments/{exp_id}", - {"saved_metrics_ids": None}, - ) - self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(Experiment.objects.get(pk=exp_id).saved_metrics.count(), 0) - - response = self.client.patch( - f"/api/projects/{self.team.id}/experiments/{exp_id}", - { - "saved_metrics_ids": [ - {"id": saved_metric_id, "metadata": {"type": "secondary"}}, - ] - }, - ) - - self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(Experiment.objects.get(pk=exp_id).saved_metrics.count(), 1) - - # not updating saved metrics shouldn't change anything - response = self.client.patch( - f"/api/projects/{self.team.id}/experiments/{exp_id}", - { - "name": "Test Experiment 2", - }, - ) - - self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(Experiment.objects.get(pk=exp_id).saved_metrics.count(), 1) - - # now delete saved metric - response = self.client.delete(f"/api/projects/{self.team.id}/experiment_saved_metrics/{saved_metric_id}") - self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) - - # make sure experiment in question was updated as well - self.assertEqual(Experiment.objects.get(pk=exp_id).saved_metrics.count(), 0) - - def test_validate_saved_metrics_payload(self): - response = self.client.post( - f"/api/projects/{self.team.id}/experiment_saved_metrics/", - { - "name": "Test Experiment saved metric", - "description": "Test description", - "query": { - "kind": "ExperimentTrendsQuery", - "count_query": {"kind": "TrendsQuery", "series": [{"kind": "EventsNode", "event": "$pageview"}]}, - }, - }, - ) - - saved_metric_id = response.json()["id"] - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - - # Generate experiment to have saved metric - ff_key = "a-b-tests" - exp_data = { - "name": "Test Experiment", - "description": "", - "start_date": "2021-12-01T10:23", - "end_date": None, - "feature_flag_key": ff_key, - "parameters": None, - "filters": { - "events": [ - {"order": 0, "id": "$pageview"}, - {"order": 1, "id": "$pageleave"}, - ], - "properties": [], - }, - } - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - **exp_data, - "saved_metrics_ids": [{"id": saved_metric_id, "metadata": {"xxx": "secondary"}}], - }, - ) - - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json()["type"], "validation_error") - self.assertEqual( - response.json()["detail"], - "Metadata must have a type key", - ) - - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - **exp_data, - "saved_metrics_ids": [{"saved_metric": saved_metric_id}], - }, - ) - - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json()["type"], "validation_error") - self.assertEqual(response.json()["detail"], "Saved metric must have an id") - - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - **exp_data, - "saved_metrics_ids": [{"id": 12345678}], - }, - ) - - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json()["type"], "validation_error") - self.assertEqual(response.json()["detail"], "Saved metric does not exist") - - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - **exp_data, - "saved_metrics_ids": {"id": saved_metric_id}, - }, - ) - - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json()["type"], "validation_error") - self.assertEqual(response.json()["detail"], 'Expected a list of items but got type "dict".') - - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - **exp_data, - "saved_metrics_ids": [[saved_metric_id]], - }, - ) - - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json()["type"], "validation_error") - self.assertEqual(response.json()["detail"], "Saved metric must be an object") - - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - **exp_data, - "saved_metrics_ids": [{"id": saved_metric_id, "metadata": "secondary"}], - }, - ) - - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json()["type"], "validation_error") - self.assertEqual(response.json()["detail"], "Metadata must be an object") - - def test_adding_behavioral_cohort_filter_to_experiment_fails(self): - cohort = Cohort.objects.create( - team=self.team, - filters={ - "properties": { - "type": "AND", - "values": [ - { - "key": "$pageview", - "event_type": "events", - "time_value": 2, - "time_interval": "week", - "value": "performed_event_first_time", - "type": "behavioral", - }, - ], - } - }, - name="cohort_behavioral", - ) - ff_key = "a-b-tests" - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "", - "start_date": "2021-12-01T10:23", - "end_date": None, - "feature_flag_key": ff_key, - "parameters": None, - "filters": { - "events": [ - {"order": 0, "id": "$pageview"}, - {"order": 1, "id": "$pageleave"}, - ], - "properties": [], - }, - }, - ) - - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - - id = response.json()["id"] - - # Now update - response = self.client.patch( - f"/api/projects/{self.team.id}/experiments/{id}", - {"filters": {"properties": [{"key": "id", "value": cohort.pk, "type": "cohort"}]}}, - ) - - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json()["type"], "validation_error") - self.assertEqual( - response.json()["detail"], - "Experiments do not support global filter properties", - ) - - def test_invalid_create(self): - # Draft experiment - ff_key = "a-b-tests" - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": None, # invalid - "description": "", - "start_date": None, - "end_date": None, - "feature_flag_key": ff_key, - "parameters": {}, - "filters": {}, - }, - ) - - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json()["detail"], "This field may not be null.") - - def test_invalid_update(self): - # Draft experiment - ff_key = "a-b-tests" - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "", - "start_date": None, - "end_date": None, - "feature_flag_key": ff_key, - "parameters": {}, - "filters": {"events": []}, - }, - ) - - id = response.json()["id"] - - # Now update - response = self.client.patch( - f"/api/projects/{self.team.id}/experiments/{id}", - { - "description": "Bazinga", - "filters": {}, - "feature_flag_key": "new_key", - }, # invalid - ) - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual( - response.json()["detail"], - "Can't update keys: get_feature_flag_key on Experiment", - ) - - def test_cant_reuse_existing_feature_flag(self): - ff_key = "a-b-test" - FeatureFlag.objects.create( - team=self.team, - rollout_percentage=50, - name="Beta feature", - key=ff_key, - created_by=self.user, - ) - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "", - "start_date": "2021-12-01T10:23", - "end_date": None, - "feature_flag_key": ff_key, - "parameters": None, - "filters": {"events": []}, - }, - ) - - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json()["detail"], "There is already a feature flag with this key.") - - def test_draft_experiment_doesnt_have_FF_active(self): - # Draft experiment - ff_key = "a-b-tests" - self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "", - "start_date": None, - "end_date": None, - "feature_flag_key": ff_key, - "parameters": {}, - "filters": {"events": []}, - }, - ) - - created_ff = FeatureFlag.objects.get(key=ff_key) - self.assertEqual(created_ff.key, ff_key) - self.assertFalse(created_ff.active) - - def test_draft_experiment_doesnt_have_FF_active_even_after_updates(self): - # Draft experiment - ff_key = "a-b-tests" - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "", - "start_date": None, - "end_date": None, - "feature_flag_key": ff_key, - "parameters": {}, - "filters": {"events": []}, - }, - ) - - id = response.json()["id"] - - created_ff = FeatureFlag.objects.get(key=ff_key) - self.assertEqual(created_ff.key, ff_key) - self.assertFalse(created_ff.active) - - # Now update - response = self.client.patch( - f"/api/projects/{self.team.id}/experiments/{id}", - { - "description": "Bazinga", - "filters": { - "events": [{"id": "$pageview"}], - }, - }, - ) - self.assertEqual(response.status_code, status.HTTP_200_OK) - - created_ff = FeatureFlag.objects.get(key=ff_key) - self.assertEqual(created_ff.key, ff_key) - self.assertFalse(created_ff.active) # didn't change to enabled while still draft - - # Now launch experiment - response = self.client.patch( - f"/api/projects/{self.team.id}/experiments/{id}", - {"start_date": "2021-12-01T10:23"}, - ) - self.assertEqual(response.status_code, status.HTTP_200_OK) - - created_ff = FeatureFlag.objects.get(key=ff_key) - self.assertEqual(created_ff.key, ff_key) - self.assertTrue(created_ff.active) - - def test_launching_draft_experiment_activates_FF(self): - # Draft experiment - ff_key = "a-b-tests" - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "", - "start_date": None, - "end_date": None, - "feature_flag_key": ff_key, - "parameters": {}, - "filters": {"events": [{"id": "$pageview"}]}, - }, - ) - - id = response.json()["id"] - created_ff = FeatureFlag.objects.get(key=ff_key) - self.assertEqual(created_ff.key, ff_key) - self.assertFalse(created_ff.active) - - response = self.client.patch( - f"/api/projects/{self.team.id}/experiments/{id}", - {"description": "Bazinga", "start_date": "2021-12-01T10:23"}, - ) - self.assertEqual(response.status_code, status.HTTP_200_OK) - updated_ff = FeatureFlag.objects.get(key=ff_key) - self.assertTrue(updated_ff.active) - - def test_create_multivariate_experiment_can_update_variants_in_draft(self): - ff_key = "a-b-test" - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "", - "end_date": None, - "feature_flag_key": ff_key, - "parameters": { - "feature_flag_variants": [ - { - "key": "control", - "name": "Control Group", - "rollout_percentage": 33, - }, - { - "key": "test_1", - "name": "Test Variant", - "rollout_percentage": 33, - }, - { - "key": "test_2", - "name": "Test Variant", - "rollout_percentage": 34, - }, - ] - }, - "filters": { - "events": [ - {"order": 0, "id": "$pageview"}, - {"order": 1, "id": "$pageleave"}, - ], - "properties": [], - }, - }, - ) - - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - self.assertEqual(response.json()["name"], "Test Experiment") - self.assertEqual(response.json()["feature_flag_key"], ff_key) - - created_ff = FeatureFlag.objects.get(key=ff_key) - - self.assertEqual(created_ff.key, ff_key) - self.assertEqual(created_ff.active, False) - self.assertEqual(created_ff.filters["multivariate"]["variants"][0]["key"], "control") - self.assertEqual(created_ff.filters["multivariate"]["variants"][1]["key"], "test_1") - self.assertEqual(created_ff.filters["multivariate"]["variants"][2]["key"], "test_2") - self.assertEqual(created_ff.filters["groups"][0]["properties"], []) - - id = response.json()["id"] - - experiment = Experiment.objects.get(id=response.json()["id"]) - self.assertTrue(experiment.is_draft) - # Now try updating FF - response = self.client.patch( - f"/api/projects/{self.team.id}/experiments/{id}", - { - "description": "Bazinga", - "parameters": { - "feature_flag_variants": [ - { - "key": "control", - "name": "Control Group", - "rollout_percentage": 33, - }, - { - "key": "test_1", - "name": "Test Variant", - "rollout_percentage": 33, - }, - { - "key": "test_2", - "name": "Test Variant", - "rollout_percentage": 24, - }, - { - "key": "test_3", - "name": "Test Variant", - "rollout_percentage": 10, - }, - ] - }, - }, - ) - self.assertEqual(response.status_code, status.HTTP_200_OK) - created_ff = FeatureFlag.objects.get(key=ff_key) - - self.assertEqual(created_ff.key, ff_key) - self.assertEqual(created_ff.active, False) - self.assertEqual(created_ff.filters["multivariate"]["variants"][0]["key"], "control") - self.assertEqual(created_ff.filters["multivariate"]["variants"][3]["key"], "test_3") - - def test_create_multivariate_experiment(self): - ff_key = "a-b-test" - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "", - "start_date": "2021-12-01T10:23", - "end_date": None, - "feature_flag_key": ff_key, - "parameters": { - "feature_flag_variants": [ - { - "key": "control", - "name": "Control Group", - "rollout_percentage": 33, - }, - { - "key": "test_1", - "name": "Test Variant", - "rollout_percentage": 33, - }, - { - "key": "test_2", - "name": "Test Variant", - "rollout_percentage": 34, - }, - ] - }, - "filters": { - "events": [ - {"order": 0, "id": "$pageview"}, - {"order": 1, "id": "$pageleave"}, - ], - "properties": [], - }, - }, - ) - - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - self.assertEqual(response.json()["name"], "Test Experiment") - self.assertEqual(response.json()["feature_flag_key"], ff_key) - - created_ff = FeatureFlag.objects.get(key=ff_key) - - self.assertEqual(created_ff.key, ff_key) - self.assertEqual(created_ff.active, True) - self.assertEqual(created_ff.filters["multivariate"]["variants"][0]["key"], "control") - self.assertEqual(created_ff.filters["multivariate"]["variants"][1]["key"], "test_1") - self.assertEqual(created_ff.filters["multivariate"]["variants"][2]["key"], "test_2") - self.assertEqual(created_ff.filters["groups"][0]["properties"], []) - - id = response.json()["id"] - - experiment = Experiment.objects.get(id=response.json()["id"]) - self.assertFalse(experiment.is_draft) - # Now try updating FF - response = self.client.patch( - f"/api/projects/{self.team.id}/experiments/{id}", - { - "description": "Bazinga", - "parameters": {"feature_flag_variants": [{"key": "control", "name": "X", "rollout_percentage": 33}]}, - }, - ) - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual( - response.json()["detail"], - "Can't update feature_flag_variants on Experiment", - ) - - # Allow changing FF rollout %s - created_ff = FeatureFlag.objects.get(key=ff_key) - created_ff.filters = { - **created_ff.filters, - "multivariate": { - "variants": [ - { - "key": "control", - "name": "Control Group", - "rollout_percentage": 35, - }, - {"key": "test_1", "name": "Test Variant", "rollout_percentage": 33}, - {"key": "test_2", "name": "Test Variant", "rollout_percentage": 32}, - ] - }, - } - created_ff.save() - - response = self.client.patch( - f"/api/projects/{self.team.id}/experiments/{id}", - { - "description": "Bazinga 222", - "parameters": { - "feature_flag_variants": [ - { - "key": "control", - "name": "Control Group", - "rollout_percentage": 33, - }, - { - "key": "test_1", - "name": "Test Variant", - "rollout_percentage": 33, - }, - { - "key": "test_2", - "name": "Test Variant", - "rollout_percentage": 34, - }, - ] - }, - }, - ) - self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(response.json()["parameters"]["feature_flag_variants"][0]["key"], "control") - self.assertEqual(response.json()["description"], "Bazinga 222") - created_ff = FeatureFlag.objects.get(key=ff_key) - - self.assertEqual(created_ff.key, ff_key) - self.assertEqual(created_ff.active, True) - self.assertEqual(created_ff.filters["multivariate"]["variants"][0]["key"], "control") - self.assertEqual(created_ff.filters["multivariate"]["variants"][0]["rollout_percentage"], 35) - self.assertEqual(created_ff.filters["multivariate"]["variants"][1]["key"], "test_1") - self.assertEqual(created_ff.filters["multivariate"]["variants"][1]["rollout_percentage"], 33) - self.assertEqual(created_ff.filters["multivariate"]["variants"][2]["key"], "test_2") - self.assertEqual(created_ff.filters["multivariate"]["variants"][2]["rollout_percentage"], 32) - - # Now try changing FF keys - response = self.client.patch( - f"/api/projects/{self.team.id}/experiments/{id}", - { - "description": "Bazinga", - "parameters": { - "feature_flag_variants": [ - { - "key": "control", - "name": "Control Group", - "rollout_percentage": 33, - }, - { - "key": "test", - "name": "Test Variant", - "rollout_percentage": 33, - }, - { - "key": "test2", - "name": "Test Variant", - "rollout_percentage": 34, - }, - ] - }, - }, - ) - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual( - response.json()["detail"], - "Can't update feature_flag_variants on Experiment", - ) - - # Now try updating other parameter keys - response = self.client.patch( - f"/api/projects/{self.team.id}/experiments/{id}", - {"description": "Bazinga", "parameters": {"recommended_sample_size": 1500}}, - ) - self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(response.json()["parameters"]["recommended_sample_size"], 1500) - - def test_creating_invalid_multivariate_experiment_no_control(self): - ff_key = "a-b-test" - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "", - "start_date": "2021-12-01T10:23", - "end_date": None, - "feature_flag_key": ff_key, - "parameters": { - "feature_flag_variants": [ - # no control - { - "key": "test_0", - "name": "Control Group", - "rollout_percentage": 33, - }, - { - "key": "test_1", - "name": "Test Variant", - "rollout_percentage": 33, - }, - { - "key": "test_2", - "name": "Test Variant", - "rollout_percentage": 33, - }, - ] - }, - "filters": { - "events": [ - {"order": 0, "id": "$pageview"}, - {"order": 1, "id": "$pageleave"}, - ], - "properties": [], - }, - }, - ) - - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual( - response.json()["detail"], - "Feature flag variants must contain a control variant", - ) - - def test_deleting_experiment_soft_deletes_feature_flag(self): - ff_key = "a-b-tests" - data = { - "name": "Test Experiment", - "description": "", - "start_date": "2021-12-01T10:23", - "end_date": None, - "feature_flag_key": ff_key, - "parameters": None, - "filters": { - "events": [ - {"order": 0, "id": "$pageview"}, - {"order": 1, "id": "$pageleave"}, - ], - "properties": [], - }, - } - response = self.client.post(f"/api/projects/{self.team.id}/experiments/", data) - - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - self.assertEqual(response.json()["name"], "Test Experiment") - self.assertEqual(response.json()["feature_flag_key"], ff_key) - - created_ff = FeatureFlag.objects.get(key=ff_key) - - id = response.json()["id"] - - # Now delete the experiment - response = self.client.delete(f"/api/projects/{self.team.id}/experiments/{id}") - - self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) - - with self.assertRaises(Experiment.DoesNotExist): - Experiment.objects.get(pk=id) - - # soft deleted - self.assertEqual(FeatureFlag.objects.get(pk=created_ff.id).deleted, True) - - # can recreate new experiment with same FF key - response = self.client.post(f"/api/projects/{self.team.id}/experiments/", data) - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - - def test_soft_deleting_feature_flag_does_not_delete_experiment(self): - ff_key = "a-b-tests" - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "", - "start_date": "2021-12-01T10:23", - "end_date": None, - "feature_flag_key": ff_key, - "parameters": None, - "filters": { - "events": [ - {"order": 0, "id": "$pageview"}, - {"order": 1, "id": "$pageleave"}, - ], - "properties": [], - }, - }, - ) - - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - self.assertEqual(response.json()["name"], "Test Experiment") - self.assertEqual(response.json()["feature_flag_key"], ff_key) - - created_ff = FeatureFlag.objects.get(key=ff_key) - - id = response.json()["id"] - - # Now delete the feature flag - response = self.client.patch( - f"/api/projects/{self.team.id}/feature_flags/{created_ff.pk}/", - {"deleted": True}, - ) - - self.assertEqual(response.status_code, status.HTTP_200_OK) - - feature_flag_response = self.client.get(f"/api/projects/{self.team.id}/feature_flags/{created_ff.pk}/") - self.assertEqual(feature_flag_response.json().get("deleted"), True) - - self.assertIsNotNone(Experiment.objects.get(pk=id)) - - def test_creating_updating_experiment_with_group_aggregation(self): - ff_key = "a-b-tests" - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "", - "start_date": None, - "end_date": None, - "feature_flag_key": ff_key, - "parameters": None, - "filters": { - "events": [ - {"order": 0, "id": "$pageview"}, - {"order": 1, "id": "$pageleave"}, - ], - "properties": [], - "aggregation_group_type_index": 1, - }, - }, - ) - - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - self.assertEqual(response.json()["name"], "Test Experiment") - self.assertEqual(response.json()["feature_flag_key"], ff_key) - - created_ff = FeatureFlag.objects.get(key=ff_key) - - self.assertEqual(created_ff.key, ff_key) - self.assertEqual(created_ff.filters["multivariate"]["variants"][0]["key"], "control") - self.assertEqual(created_ff.filters["multivariate"]["variants"][1]["key"], "test") - self.assertEqual(created_ff.filters["groups"][0]["properties"], []) - self.assertTrue(created_ff.filters["aggregation_group_type_index"] is None) - - id = response.json()["id"] - - # Now update group type index on filter - response = self.client.patch( - f"/api/projects/{self.team.id}/experiments/{id}", - { - "description": "Bazinga", - "filters": { - "events": [ - {"order": 0, "id": "$pageview"}, - {"order": 1, "id": "$pageleave"}, - ], - "properties": [], - "aggregation_group_type_index": 0, - }, - }, - ) - - self.assertEqual(response.status_code, status.HTTP_200_OK) - - experiment = Experiment.objects.get(pk=id) - self.assertEqual(experiment.description, "Bazinga") - - created_ff = FeatureFlag.objects.get(key=ff_key) - self.assertEqual(created_ff.key, ff_key) - self.assertFalse(created_ff.active) - self.assertEqual(created_ff.filters["multivariate"]["variants"][0]["key"], "control") - self.assertEqual(created_ff.filters["multivariate"]["variants"][1]["key"], "test") - self.assertEqual(created_ff.filters["groups"][0]["properties"], []) - self.assertTrue(created_ff.filters["aggregation_group_type_index"] is None) - - # Now remove group type index - response = self.client.patch( - f"/api/projects/{self.team.id}/experiments/{id}", - { - "description": "Bazinga", - "filters": { - "events": [ - {"order": 0, "id": "$pageview"}, - {"order": 1, "id": "$pageleave"}, - ], - "properties": [], - # "aggregation_group_type_index": None, # removed key - }, - }, - ) - - self.assertEqual(response.status_code, status.HTTP_200_OK) - - experiment = Experiment.objects.get(pk=id) - self.assertEqual(experiment.description, "Bazinga") - - created_ff = FeatureFlag.objects.get(key=ff_key) - self.assertEqual(created_ff.key, ff_key) - self.assertFalse(created_ff.active) - self.assertEqual(created_ff.filters["multivariate"]["variants"][0]["key"], "control") - self.assertEqual(created_ff.filters["multivariate"]["variants"][1]["key"], "test") - self.assertEqual(created_ff.filters["groups"][0]["properties"], []) - self.assertTrue(created_ff.filters["aggregation_group_type_index"] is None) - - def test_creating_experiment_with_group_aggregation_parameter(self): - ff_key = "a-b-tests" - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "", - "start_date": None, - "end_date": None, - "feature_flag_key": ff_key, - "parameters": { - "aggregation_group_type_index": 0, - }, - "filters": { - "events": [ - {"order": 0, "id": "$pageview"}, - {"order": 1, "id": "$pageleave"}, - ], - "properties": [], - }, - }, - ) - - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - self.assertEqual(response.json()["name"], "Test Experiment") - self.assertEqual(response.json()["feature_flag_key"], ff_key) - - created_ff = FeatureFlag.objects.get(key=ff_key) - - self.assertEqual(created_ff.key, ff_key) - self.assertEqual(created_ff.filters["multivariate"]["variants"][0]["key"], "control") - self.assertEqual(created_ff.filters["multivariate"]["variants"][1]["key"], "test") - self.assertEqual(created_ff.filters["groups"][0]["properties"], []) - self.assertEqual(created_ff.filters["aggregation_group_type_index"], 0) - - id = response.json()["id"] - - # Now update group type index on filter - response = self.client.patch( - f"/api/projects/{self.team.id}/experiments/{id}", - { - "description": "Bazinga", - "filters": { - "events": [ - {"order": 0, "id": "$pageview"}, - {"order": 1, "id": "$pageleave"}, - ], - "properties": [], - "aggregation_group_type_index": 1, - }, - }, - ) - - self.assertEqual(response.status_code, status.HTTP_200_OK) - - experiment = Experiment.objects.get(pk=id) - self.assertEqual(experiment.description, "Bazinga") - - created_ff = FeatureFlag.objects.get(key=ff_key) - self.assertEqual(created_ff.key, ff_key) - self.assertFalse(created_ff.active) - self.assertEqual(created_ff.filters["multivariate"]["variants"][0]["key"], "control") - self.assertEqual(created_ff.filters["multivariate"]["variants"][1]["key"], "test") - self.assertEqual(created_ff.filters["groups"][0]["properties"], []) - self.assertEqual(created_ff.filters["aggregation_group_type_index"], 0) - - def test_used_in_experiment_is_populated_correctly_for_feature_flag_list(self) -> None: - ff_key = "a-b-test" - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "", - "start_date": "2021-12-01T10:23", - "end_date": None, - "feature_flag_key": ff_key, - "parameters": { - "feature_flag_variants": [ - { - "key": "control", - "name": "Control Group", - "rollout_percentage": 33, - }, - { - "key": "test_1", - "name": "Test Variant", - "rollout_percentage": 33, - }, - { - "key": "test_2", - "name": "Test Variant", - "rollout_percentage": 34, - }, - ] - }, - "filters": { - "events": [ - {"order": 0, "id": "$pageview"}, - {"order": 1, "id": "$pageleave"}, - ], - "properties": [], - }, - }, - ) - - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - self.assertEqual(response.json()["name"], "Test Experiment") - self.assertEqual(response.json()["feature_flag_key"], ff_key) - - created_experiment = response.json()["id"] - - # add another random feature flag - self.client.post( - f"/api/projects/{self.team.id}/feature_flags/", - data={ - "name": f"flag", - "key": f"flag_0", - "filters": {"groups": [{"rollout_percentage": 5}]}, - }, - format="json", - ).json() - - # TODO: Make sure permission bool doesn't cause n + 1 - with self.assertNumQueries(17): - response = self.client.get(f"/api/projects/{self.team.id}/feature_flags") - self.assertEqual(response.status_code, status.HTTP_200_OK) - result = response.json() - - self.assertEqual(result["count"], 2) - - self.assertCountEqual( - [(res["key"], res["experiment_set"]) for res in result["results"]], - [("flag_0", []), (ff_key, [created_experiment])], - ) - - def test_create_experiment_updates_feature_flag_cache(self): - cache.clear() - - initial_cached_flags = get_feature_flags_for_team_in_cache(self.team.pk) - self.assertIsNone(initial_cached_flags) - - ff_key = "a-b-test" - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "", - "start_date": None, - "end_date": None, - "feature_flag_key": ff_key, - "parameters": { - "feature_flag_variants": [ - { - "key": "control", - "name": "Control Group", - "rollout_percentage": 33, - }, - { - "key": "test_1", - "name": "Test Variant", - "rollout_percentage": 33, - }, - { - "key": "test_2", - "name": "Test Variant", - "rollout_percentage": 34, - }, - ] - }, - "filters": { - "events": [ - {"order": 0, "id": "$pageview"}, - {"order": 1, "id": "$pageleave"}, - ], - "properties": [], - }, - }, - ) - - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - self.assertEqual(response.json()["name"], "Test Experiment") - self.assertEqual(response.json()["feature_flag_key"], ff_key) - - # save was called, but no flags saved because experiment is in draft mode, so flag is not active - cached_flags = get_feature_flags_for_team_in_cache(self.team.pk) - assert cached_flags is not None - self.assertEqual(0, len(cached_flags)) - - id = response.json()["id"] - - # launch experiment - response = self.client.patch( - f"/api/projects/{self.team.id}/experiments/{id}", - { - "start_date": "2021-12-01T10:23", - }, - ) - - cached_flags = get_feature_flags_for_team_in_cache(self.team.pk) - assert cached_flags is not None - self.assertEqual(1, len(cached_flags)) - self.assertEqual(cached_flags[0].key, ff_key) - self.assertEqual( - cached_flags[0].filters, - { - "groups": [ - { - "properties": [], - "rollout_percentage": 100, - } - ], - "multivariate": { - "variants": [ - { - "key": "control", - "name": "Control Group", - "rollout_percentage": 33, - }, - { - "key": "test_1", - "name": "Test Variant", - "rollout_percentage": 33, - }, - { - "key": "test_2", - "name": "Test Variant", - "rollout_percentage": 34, - }, - ] - }, - "aggregation_group_type_index": None, - "holdout_groups": None, - }, - ) - - # Now try updating FF - response = self.client.patch( - f"/api/projects/{self.team.id}/experiments/{id}", - { - "description": "Bazinga", - "parameters": {"feature_flag_variants": [{"key": "control", "name": "X", "rollout_percentage": 33}]}, - }, - ) - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual( - response.json()["detail"], - "Can't update feature_flag_variants on Experiment", - ) - - # ensure cache doesn't change either - cached_flags = get_feature_flags_for_team_in_cache(self.team.pk) - assert cached_flags is not None - self.assertEqual(1, len(cached_flags)) - self.assertEqual(cached_flags[0].key, ff_key) - self.assertEqual( - cached_flags[0].filters, - { - "groups": [ - { - "properties": [], - "rollout_percentage": 100, - } - ], - "multivariate": { - "variants": [ - { - "key": "control", - "name": "Control Group", - "rollout_percentage": 33, - }, - { - "key": "test_1", - "name": "Test Variant", - "rollout_percentage": 33, - }, - { - "key": "test_2", - "name": "Test Variant", - "rollout_percentage": 34, - }, - ] - }, - "aggregation_group_type_index": None, - "holdout_groups": None, - }, - ) - - # Now try changing FF rollout %s - response = self.client.patch( - f"/api/projects/{self.team.id}/experiments/{id}", - { - "description": "Bazinga", - "parameters": { - "feature_flag_variants": [ - { - "key": "control", - "name": "Control Group", - "rollout_percentage": 34, - }, - { - "key": "test_1", - "name": "Test Variant", - "rollout_percentage": 33, - }, - { - "key": "test_2", - "name": "Test Variant", - "rollout_percentage": 32, - }, - ] - }, - }, - ) - # changing variants isn't really supported by experiments anymore, need to do it directly - # on the FF - self.assertEqual(response.status_code, status.HTTP_200_OK) - - # ensure cache doesn't change either - cached_flags = get_feature_flags_for_team_in_cache(self.team.pk) - assert cached_flags is not None - self.assertEqual(1, len(cached_flags)) - self.assertEqual(cached_flags[0].key, ff_key) - self.assertEqual( - cached_flags[0].filters, - { - "groups": [ - { - "properties": [], - "rollout_percentage": 100, - } - ], - "multivariate": { - "variants": [ - { - "key": "control", - "name": "Control Group", - "rollout_percentage": 33, - }, - { - "key": "test_1", - "name": "Test Variant", - "rollout_percentage": 33, - }, - { - "key": "test_2", - "name": "Test Variant", - "rollout_percentage": 34, - }, - ] - }, - "aggregation_group_type_index": None, - "holdout_groups": None, - }, - ) - - def test_create_draft_experiment_with_filters(self) -> None: - ff_key = "a-b-tests" - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "", - "start_date": None, - "end_date": None, - "feature_flag_key": ff_key, - "parameters": None, - "filters": { - "events": [ - {"order": 0, "id": "$pageview"}, - {"order": 1, "id": "$pageleave"}, - ], - "properties": [], - }, - }, - ) - - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - self.assertEqual(response.json()["name"], "Test Experiment") - self.assertEqual(response.json()["feature_flag_key"], ff_key) - - def test_create_launched_experiment_with_filters(self) -> None: - ff_key = "a-b-tests" - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "", - "start_date": "2021-12-01T10:23", - "end_date": None, - "feature_flag_key": ff_key, - "parameters": None, - "filters": { - "events": [ - {"order": 0, "id": "$pageview"}, - {"order": 1, "id": "$pageleave"}, - ], - "properties": [], - }, - }, - ) - - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - self.assertEqual(response.json()["name"], "Test Experiment") - self.assertEqual(response.json()["feature_flag_key"], ff_key) - - def test_create_draft_experiment_without_filters(self) -> None: - ff_key = "a-b-tests" - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "", - "start_date": None, - "end_date": None, - "feature_flag_key": ff_key, - "parameters": None, - "filters": {}, - }, - ) - - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - self.assertEqual(response.json()["name"], "Test Experiment") - self.assertEqual(response.json()["feature_flag_key"], ff_key) - - def test_feature_flag_and_experiment_sync(self): - # Create an experiment with control and test variants - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "My test experiment", - "feature_flag_key": "experiment-test-flag", - "parameters": { - "feature_flag_variants": [ - {"key": "control", "name": "Control Group", "rollout_percentage": 50}, - {"key": "test", "name": "Test Variant", "rollout_percentage": 50}, - ] - }, - "filters": {"insight": "TRENDS", "events": [{"order": 0, "id": "$pageview"}]}, - }, - ) - - self.assertEqual(response.status_code, 201) - experiment_id = response.json()["id"] - feature_flag_id = response.json()["feature_flag"]["id"] - - # Fetch the FeatureFlag object - feature_flag = FeatureFlag.objects.get(id=feature_flag_id) - - variants = feature_flag.filters["multivariate"]["variants"] - - # Verify that the variants are correctly populated - self.assertEqual(len(variants), 2) - - self.assertEqual(variants[0]["key"], "control") - self.assertEqual(variants[0]["name"], "Control Group") - self.assertEqual(variants[0]["rollout_percentage"], 50) - - self.assertEqual(variants[1]["key"], "test") - self.assertEqual(variants[1]["name"], "Test Variant") - self.assertEqual(variants[1]["rollout_percentage"], 50) - - # Change the rollout percentages and groups of the feature flag - response = self.client.patch( - f"/api/projects/{self.team.id}/feature_flags/{feature_flag_id}", - { - "filters": { - "groups": [ - {"properties": [], "rollout_percentage": 99}, - {"properties": [], "rollout_percentage": 1}, - ], - "payloads": {}, - "multivariate": { - "variants": [ - {"key": "control", "rollout_percentage": 10}, - {"key": "test", "rollout_percentage": 90}, - ] - }, - "aggregation_group_type_index": 1, - } - }, - ) - - # Verify that Experiment.parameters.feature_flag_variants reflects the updated FeatureFlag.filters.multivariate.variants - experiment = Experiment.objects.get(id=experiment_id) - self.assertEqual( - experiment.parameters["feature_flag_variants"], - [{"key": "control", "rollout_percentage": 10}, {"key": "test", "rollout_percentage": 90}], - ) - self.assertEqual(experiment.parameters["aggregation_group_type_index"], 1) - - # Update the experiment with an unrelated change - response = self.client.patch( - f"/api/projects/{self.team.id}/experiments/{experiment_id}", - {"name": "Updated Test Experiment"}, - ) - - # Verify that the feature flag variants and groups remain unchanged - feature_flag = FeatureFlag.objects.get(id=feature_flag_id) - self.assertEqual( - feature_flag.filters["multivariate"]["variants"], - [{"key": "control", "rollout_percentage": 10}, {"key": "test", "rollout_percentage": 90}], - ) - self.assertEqual( - feature_flag.filters["groups"], - [{"properties": [], "rollout_percentage": 99}, {"properties": [], "rollout_percentage": 1}], - ) - - # Test removing aggregation_group_type_index - response = self.client.patch( - f"/api/projects/{self.team.id}/feature_flags/{feature_flag_id}", - { - "filters": { - "groups": [ - {"properties": [], "rollout_percentage": 99}, - {"properties": [], "rollout_percentage": 1}, - ], - "payloads": {}, - "multivariate": { - "variants": [ - {"key": "control", "rollout_percentage": 10}, - {"key": "test", "rollout_percentage": 90}, - ] - }, - } - }, - ) - - # Verify that aggregation_group_type_index is removed from experiment parameters - experiment = Experiment.objects.get(id=experiment_id) - self.assertNotIn("aggregation_group_type_index", experiment.parameters) - - -class TestExperimentAuxiliaryEndpoints(ClickhouseTestMixin, APILicensedTest): - def _generate_experiment(self, start_date="2024-01-01T10:23", extra_parameters=None): - ff_key = "a-b-test" - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "", - "start_date": start_date, - "end_date": None, - "feature_flag_key": ff_key, - "parameters": { - "feature_flag_variants": [ - { - "key": "control", - "name": "Control Group", - "rollout_percentage": 33, - }, - { - "key": "test_1", - "name": "Test Variant", - "rollout_percentage": 33, - }, - { - "key": "test_2", - "name": "Test Variant", - "rollout_percentage": 34, - }, - ], - **(extra_parameters or {}), - }, - "filters": { - "events": [ - {"order": 0, "id": "$pageview"}, - {"order": 1, "id": "$pageleave"}, - ], - "properties": [], - }, - }, - ) - - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - self.assertEqual(response.json()["name"], "Test Experiment") - self.assertEqual(response.json()["feature_flag_key"], ff_key) - return response - - def test_create_exposure_cohort_for_experiment(self): - response = self._generate_experiment("2024-01-01T10:23") - - created_experiment = response.json()["id"] - - journeys_for( - { - "person1": [ - { - "event": "$feature_flag_called", - "timestamp": "2024-01-02", - "properties": {"$feature_flag": "a-b-test", "$feature_flag_response": "control"}, - }, - { - "event": "$feature_flag_called", - "timestamp": "2024-01-03", - "properties": {"$feature_flag": "a-b-test", "$feature_flag_response": "control"}, - }, - ], - "person2": [ - { - "event": "$feature_flag_called", - "timestamp": "2024-01-02", - "properties": {"$feature_flag": "a-b-test", "$feature_flag_response": "test_1"}, - }, - ], - "personX": [ - { - "event": "$feature_flag_called", - "timestamp": "2024-01-02", - "properties": {"$feature_flag": "a-b-test2", "$feature_flag_response": "test_1"}, - }, - ], - # out of time range - "person3": [ - { - "event": "$feature_flag_called", - "timestamp": "2023-01-02", - "properties": {"$feature_flag": "a-b-test", "$feature_flag_response": "control"}, - }, - ], - # wrong event - "person_out_of_control": [ - {"event": "$pageview", "timestamp": "2024-01-03"}, - {"event": "$pageleave", "timestamp": "2024-01-05"}, - ], - # doesn't have feature value set - "person_out_of_end_date": [ - { - "event": "$feature_flag_called", - "timestamp": "2024-01-03", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - }, - self.team, - ) - flush_persons_and_events() - - # now call to make cohort - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/{created_experiment}/create_exposure_cohort_for_experiment/", - {}, - ) - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - cohort = response.json()["cohort"] - self.assertEqual(cohort["name"], 'Users exposed to experiment "Test Experiment"') - self.assertEqual(cohort["experiment_set"], [created_experiment]) - - cohort_id = cohort["id"] - - while cohort["is_calculating"]: - response = self.client.get(f"/api/projects/{self.team.id}/cohorts/{cohort_id}") - cohort = response.json() - - response = self.client.get(f"/api/projects/{self.team.id}/cohorts/{cohort_id}/persons/?cohort={cohort_id}") - self.assertEqual(response.status_code, 200, response.content) - self.assertEqual(["person1", "person2"], sorted([res["name"] for res in response.json()["results"]])) - - def test_create_exposure_cohort_for_experiment_with_custom_event_exposure(self): - self.maxDiff = None - - cohort_extra = Cohort.objects.create( - team=self.team, - filters={ - "properties": { - "type": "AND", - "values": [ - { - "key": "$pageview", - "value": "http://example.com", - "type": "person", - }, - ], - } - }, - name="cohort_X", - ) - response = self._generate_experiment( - "2024-01-01T10:23", - { - "custom_exposure_filter": { - "events": [ - { - "id": "custom_exposure_event", - "order": 0, - "entity_type": "events", - "properties": [ - {"key": "bonk", "value": "bonk"}, - {"key": "id", "value": cohort_extra.id, "type": "cohort"}, - {"key": "properties.$current_url in ('x', 'y')", "type": "hogql"}, - {"key": "bonk-person", "value": "bonk", "type": "person"}, - ], - } - ], - "filter_test_accounts": False, - } - }, - ) - - created_experiment = response.json()["id"] - - journeys_for( - { - "person1": [ - { - "event": "custom_exposure_event", - "timestamp": "2024-01-02", - "properties": {"$current_url": "x", "bonk": "bonk"}, - }, - ], - "person2": [ - { - "event": "custom_exposure_event", - "timestamp": "2024-01-02", - "properties": {"$current_url": "y", "bonk": "bonk"}, - }, - ], - "person2-no-bonk": [ - { - "event": "custom_exposure_event", - "timestamp": "2024-01-02", - "properties": {"$current_url": "y"}, - }, - ], - "person2-not-in-prop": [ - { - "event": "custom_exposure_event", - "timestamp": "2024-01-02", - "properties": {"$current_url": "yxxxx"}, - }, - ], - "personX": [ - { - "event": "$feature_flag_called", - "timestamp": "2024-01-02", - "properties": {"$feature_flag": "a-b-test2", "$feature_flag_response": "test_1"}, - }, - ], - # out of time range - "person3": [ - { - "event": "custom_exposure_event", - "timestamp": "2023-01-02", - "properties": {"$current_url": "y"}, - }, - ], - # wrong event - "person_out_of_control": [ - {"event": "$pageview", "timestamp": "2024-01-03"}, - {"event": "$pageleave", "timestamp": "2024-01-05"}, - ], - }, - self.team, - ) - flush_persons_and_events() - - # now call to make cohort - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/{created_experiment}/create_exposure_cohort_for_experiment/", - {}, - ) - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - cohort = response.json()["cohort"] - self.assertEqual(cohort["name"], 'Users exposed to experiment "Test Experiment"') - self.assertEqual(cohort["experiment_set"], [created_experiment]) - self.assertEqual( - cohort["filters"], - { - "properties": { - "type": "OR", - "values": [ - { - "type": "OR", - "values": [ - { - "event_filters": [ - {"key": "bonk", "type": "event", "value": "bonk"}, - {"key": "properties.$current_url in ('x', 'y')", "type": "hogql"}, - ], - "event_type": "events", - "explicit_datetime": "2024-01-01T10:23:00+00:00", - "key": "custom_exposure_event", - "negation": False, - "type": "behavioral", - "value": "performed_event", - } - ], - } - ], - } - }, - ) - - cohort_id = cohort["id"] - - while cohort["is_calculating"]: - response = self.client.get(f"/api/projects/{self.team.id}/cohorts/{cohort_id}") - cohort = response.json() - - response = self.client.get(f"/api/projects/{self.team.id}/cohorts/{cohort_id}/persons/?cohort={cohort_id}") - self.assertEqual(response.status_code, 200, response.content) - self.assertEqual(["person1", "person2"], sorted([res["name"] for res in response.json()["results"]])) - - @snapshot_clickhouse_insert_cohortpeople_queries - def test_create_exposure_cohort_for_experiment_with_custom_action_filters_exposure(self): - cohort_extra = Cohort.objects.create( - team=self.team, - filters={ - "properties": { - "type": "AND", - "values": [ - { - "key": "$pageview", - "value": "http://example.com", - "type": "person", - }, - ], - } - }, - name="cohort_X", - ) - cohort_extra.calculate_people_ch(pending_version=1) - - action1 = Action.objects.create( - team=self.team, - name="action1", - steps_json=[ - { - "event": "insight viewed", - "properties": [ - { - "key": "insight", - "type": "event", - "value": ["RETENTION"], - "operator": "exact", - }, - { - "key": "id", - "value": cohort_extra.id, - "type": "cohort", - }, - ], - }, - { - "event": "insight viewed", - "properties": [ - { - "key": "filters_count", - "type": "event", - "value": "1", - "operator": "gt", - } - ], - }, - { - "event": "$autocapture", - "url": "/123", - "url_matching": "regex", - }, - ], - ) - response = self._generate_experiment( - datetime.now() - timedelta(days=5), - { - "custom_exposure_filter": { - "actions": [ - { - "id": str(action1.id), # should support string ids - "order": 0, - "entity_type": "actions", - "properties": [ - {"key": "bonk", "value": "bonk"}, - {"key": "id", "value": cohort_extra.id, "type": "cohort"}, - {"key": "properties.$current_url in ('x', 'y')", "type": "hogql"}, - {"key": "bonk-person", "value": "bonk", "type": "person"}, - ], - } - ], - "filter_test_accounts": False, - } - }, - ) - - created_experiment = response.json()["id"] - - journeys_for( - { - "person1": [ - { - "event": "insight viewed", - "timestamp": datetime.now() - timedelta(days=2), - "properties": {"$current_url": "x", "bonk": "bonk", "filters_count": 2}, - }, - ], - "person2": [ - { - "event": "insight viewed", - "timestamp": datetime.now() - timedelta(days=2), - "properties": { - "$current_url": "y", - "bonk": "bonk", - "insight": "RETENTION", - }, # missing pageview person property - }, - ], - "person2-no-bonk": [ - { - "event": "insight viewed", - "timestamp": datetime.now() - timedelta(days=2), - "properties": {"$current_url": "y", "filters_count": 3}, - }, - ], - "person2-not-in-prop": [ - { - "event": "$autocapture", - "timestamp": datetime.now() - timedelta(days=2), - "properties": { - "$current_url": "https://posthog.com/feedback/1234" - }, # can't match because clashing current_url filters - }, - ], - }, - self.team, - ) - _create_person( - distinct_ids=["1"], - team_id=self.team.pk, - properties={"$pageview": "http://example.com"}, - ) - _create_event( - event="insight viewed", - team=self.team, - distinct_id="1", - properties={"insight": "RETENTION", "$current_url": "x", "bonk": "bonk"}, - timestamp=datetime.now() - timedelta(days=2), - ) - _create_person( - distinct_ids=["2"], - team_id=self.team.pk, - properties={"$pageview": "http://example.com"}, - ) - _create_event( - event="insight viewed", - team=self.team, - distinct_id="2", - properties={"insight": "RETENTION", "$current_url": "x"}, - timestamp=datetime.now() - timedelta(days=2), - ) - flush_persons_and_events() - - # now call to make cohort - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/{created_experiment}/create_exposure_cohort_for_experiment/", - {}, - ) - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - cohort = response.json()["cohort"] - self.assertEqual(cohort["name"], 'Users exposed to experiment "Test Experiment"') - self.assertEqual(cohort["experiment_set"], [created_experiment]) - - self.maxDiff = None - target_filter = cohort["filters"]["properties"]["values"][0]["values"][0] - self.assertEqual( - target_filter["event_filters"], - [ - {"key": "bonk", "type": "event", "value": "bonk"}, - {"key": "properties.$current_url in ('x', 'y')", "type": "hogql"}, - ], - cohort["filters"], - ) - self.assertEqual( - target_filter["event_type"], - "actions", - ) - self.assertEqual( - target_filter["key"], - action1.id, - ) - self.assertEqual( - target_filter["type"], - "behavioral", - ) - self.assertEqual( - target_filter["value"], - "performed_event", - ) - explicit_datetime = parser.isoparse(target_filter["explicit_datetime"]) - - self.assertTrue( - explicit_datetime <= datetime.now(UTC) - timedelta(days=5) - and explicit_datetime >= datetime.now(UTC) - timedelta(days=5, hours=1) - ) - - cohort_id = cohort["id"] - - while cohort["is_calculating"]: - response = self.client.get(f"/api/projects/{self.team.id}/cohorts/{cohort_id}") - cohort = response.json() - - response = self.client.get(f"/api/projects/{self.team.id}/cohorts/{cohort_id}/persons/?cohort={cohort_id}") - self.assertEqual(response.status_code, 200, response.content) - self.assertEqual(["1", "person1"], sorted([res["name"] for res in response.json()["results"]])) - - def test_create_exposure_cohort_for_experiment_with_invalid_action_filters_exposure(self): - response = self._generate_experiment( - "2024-01-01T10:23", - { - "custom_exposure_filter": { - "actions": [ - { - "id": "oogabooga", - "order": 0, - "entity_type": "actions", - "properties": [ - {"key": "bonk", "value": "bonk"}, - {"key": "properties.$current_url in ('x', 'y')", "type": "hogql"}, - {"key": "bonk-person", "value": "bonk", "type": "person"}, - ], - } - ], - "filter_test_accounts": False, - } - }, - ) - - created_experiment = response.json()["id"] - - # now call to make cohort - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/{created_experiment}/create_exposure_cohort_for_experiment/", - {}, - ) - - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json()["detail"], "Invalid action ID") - - def test_create_exposure_cohort_for_experiment_with_draft_experiment(self): - response = self._generate_experiment(None) - - created_experiment = response.json()["id"] - - # now call to make cohort - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/{created_experiment}/create_exposure_cohort_for_experiment/", - {}, - ) - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json()["detail"], "Experiment does not have a start date") - - def test_create_exposure_cohort_for_experiment_with_existing_cohort(self): - response = self._generate_experiment() - - created_experiment = response.json()["id"] - - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/{created_experiment}/create_exposure_cohort_for_experiment/", - {}, - ) - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - - # now call to make cohort again - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/{created_experiment}/create_exposure_cohort_for_experiment/", - {}, - ) - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json()["detail"], "Experiment already has an exposure cohort") - - -@flaky(max_runs=10, min_passes=1) -class ClickhouseTestFunnelExperimentResults(ClickhouseTestMixin, APILicensedTest): - @snapshot_clickhouse_queries - def test_experiment_flow_with_event_results(self): - journeys_for( - { - "person1": [ - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test"}, - }, - { - "event": "$pageleave", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "test"}, - }, - ], - "person2": [ - { - "event": "$pageview", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageleave", - "timestamp": "2020-01-05", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - "person3": [ - { - "event": "$pageview", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageleave", - "timestamp": "2020-01-05", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - # doesn't have feature set - "person_out_of_control": [ - {"event": "$pageview", "timestamp": "2020-01-03"}, - {"event": "$pageleave", "timestamp": "2020-01-05"}, - ], - "person_out_of_end_date": [ - { - "event": "$pageview", - "timestamp": "2020-08-03", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageleave", - "timestamp": "2020-08-05", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - # non-converters with FF - "person4": [ - { - "event": "$pageview", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "test"}, - } - ], - "person5": [ - { - "event": "$pageview", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "test"}, - } - ], - }, - self.team, - ) - - ff_key = "a-b-test" - # generates the FF which should result in the above events^ - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "", - "start_date": "2020-01-01T00:00", - "end_date": "2020-01-06T00:00", - "feature_flag_key": ff_key, - "parameters": None, - "filters": { - "insight": "funnels", - "events": [ - {"order": 0, "id": "$pageview"}, - {"order": 1, "id": "$pageleave"}, - ], - "properties": [], - }, - }, - ) - - id = response.json()["id"] - - response = self.client.get(f"/api/projects/{self.team.id}/experiments/{id}/results") - self.assertEqual(200, response.status_code) - - response_data = response.json()["result"] - result = sorted(response_data["insight"], key=lambda x: x[0]["breakdown_value"][0]) - - self.assertEqual(result[0][0]["name"], "$pageview") - self.assertEqual(result[0][0]["count"], 2) - self.assertEqual("control", result[0][0]["breakdown_value"][0]) - - self.assertEqual(result[0][1]["name"], "$pageleave") - self.assertEqual(result[0][1]["count"], 2) - self.assertEqual("control", result[0][1]["breakdown_value"][0]) - - self.assertEqual(result[1][0]["name"], "$pageview") - self.assertEqual(result[1][0]["count"], 3) - self.assertEqual("test", result[1][0]["breakdown_value"][0]) - - self.assertEqual(result[1][1]["name"], "$pageleave") - self.assertEqual(result[1][1]["count"], 1) - self.assertEqual("test", result[1][1]["breakdown_value"][0]) - - # Variant with test: Beta(2, 3) and control: Beta(3, 1) distribution - # The variant has very low probability of being better. - self.assertAlmostEqual(response_data["probability"]["test"], 0.114, places=2) - self.assertEqual( - response_data["significance_code"], - ExperimentSignificanceCode.NOT_ENOUGH_EXPOSURE, - ) - self.assertAlmostEqual(response_data["expected_loss"], 1, places=2) - - @snapshot_clickhouse_queries - def test_experiment_flow_with_event_results_with_hogql_aggregation(self): - journeys_for( - { - "person1": [ - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": { - "$feature/a-b-test": "test", - "$account_id": "person1", - }, - }, - { - "event": "$pageleave", - "timestamp": "2020-01-04", - "properties": { - "$feature/a-b-test": "test", - "$account_id": "person1", - }, - }, - { - "event": "$pageview", - "timestamp": "2020-01-03", - "properties": { - "$feature/a-b-test": "control", - "$account_id": "person2", - }, - }, - { - "event": "$pageleave", - "timestamp": "2020-01-05", - "properties": { - "$feature/a-b-test": "control", - "$account_id": "person2", - }, - }, - { - "event": "$pageview", - "timestamp": "2020-01-04", - "properties": { - "$feature/a-b-test": "control", - "$account_id": "person3", - }, - }, - { - "event": "$pageleave", - "timestamp": "2020-01-05", - "properties": { - "$feature/a-b-test": "control", - "$account_id": "person3", - }, - }, - # doesn't have feature set - { - "event": "$pageview", - "timestamp": "2020-01-03", - "properties": {"$account_id": "person_out_of_control"}, - }, - { - "event": "$pageleave", - "timestamp": "2020-01-05", - "properties": {"$account_id": "person_out_of_control"}, - }, - # non converter - { - "event": "$pageview", - "timestamp": "2020-01-03", - "properties": { - "$feature/a-b-test": "test", - "$account_id": "person4", - }, - }, - { - "event": "$pageview", - "timestamp": "2020-01-04", - "properties": { - "$feature/a-b-test": "test", - "$account_id": "person5", - }, - }, - # doesn't have any properties - {"event": "$pageview", "timestamp": "2020-01-03"}, - {"event": "$pageleave", "timestamp": "2020-01-05"}, - ], - "person_out_of_end_date": [ - { - "event": "$pageview", - "timestamp": "2020-08-03", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageleave", - "timestamp": "2020-08-05", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - }, - self.team, - ) - - ff_key = "a-b-test" - # generates the FF which should result in the above events^ - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "", - "start_date": "2020-01-01T00:00", - "end_date": "2020-01-06T00:00", - "feature_flag_key": ff_key, - "parameters": None, - "filters": { - "insight": "funnels", - "events": [ - {"order": 0, "id": "$pageview"}, - {"order": 1, "id": "$pageleave"}, - ], - "properties": [], - "funnel_aggregate_by_hogql": "properties.$account_id", - }, - }, - ) - - id = response.json()["id"] - - response = self.client.get(f"/api/projects/{self.team.id}/experiments/{id}/results") - self.assertEqual(200, response.status_code) - - response_data = response.json()["result"] - result = sorted(response_data["insight"], key=lambda x: x[0]["breakdown_value"][0]) - - self.assertEqual(result[0][0]["name"], "$pageview") - self.assertEqual(result[0][0]["count"], 2) - self.assertEqual("control", result[0][0]["breakdown_value"][0]) - - self.assertEqual(result[0][1]["name"], "$pageleave") - self.assertEqual(result[0][1]["count"], 2) - self.assertEqual("control", result[0][1]["breakdown_value"][0]) - - self.assertEqual(result[1][0]["name"], "$pageview") - self.assertEqual(result[1][0]["count"], 3) - self.assertEqual("test", result[1][0]["breakdown_value"][0]) - - self.assertEqual(result[1][1]["name"], "$pageleave") - self.assertEqual(result[1][1]["count"], 1) - self.assertEqual("test", result[1][1]["breakdown_value"][0]) - - # Variant with test: Beta(2, 3) and control: Beta(3, 1) distribution - # The variant has very low probability of being better. - self.assertAlmostEqual(response_data["probability"]["test"], 0.114, places=2) - self.assertEqual( - response_data["significance_code"], - ExperimentSignificanceCode.NOT_ENOUGH_EXPOSURE, - ) - self.assertAlmostEqual(response_data["expected_loss"], 1, places=2) - - def test_experiment_with_test_account_filters(self): - self.team.test_account_filters = [ - { - "key": "exclude", - "type": "event", - "value": "yes", - "operator": "is_not_set", - } - ] - self.team.save() - - journeys_for( - { - "person1": [ - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test"}, - }, - { - "event": "$pageleave", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "test"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test", "exclude": "yes"}, - }, - { - "event": "$pageleave", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "test", "exclude": "yes"}, - }, - ], - "person2": [ - { - "event": "$pageview", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageleave", - "timestamp": "2020-01-05", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - "person3": [ - { - "event": "$pageview", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageleave", - "timestamp": "2020-01-05", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - "person3_exclude": [ - { - "event": "$pageview", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "control", "exclude": "yes"}, - }, - { - "event": "$pageleave", - "timestamp": "2020-01-05", - "properties": {"$feature/a-b-test": "control", "exclude": "yes"}, - }, - ], - # doesn't have feature set - "person_out_of_control": [ - {"event": "$pageview", "timestamp": "2020-01-03"}, - {"event": "$pageleave", "timestamp": "2020-01-05"}, - ], - "person_out_of_end_date": [ - { - "event": "$pageview", - "timestamp": "2020-08-03", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageleave", - "timestamp": "2020-08-05", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - # non-converters with FF - "person4": [ - { - "event": "$pageview", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "test"}, - } - ], - "person5": [ - { - "event": "$pageview", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "test"}, - } - ], - }, - self.team, - ) - - ff_key = "a-b-test" - # generates the FF which should result in the above events^ - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "", - "start_date": "2020-01-01T00:00", - "end_date": "2020-01-06T00:00", - "feature_flag_key": ff_key, - "parameters": None, - "filters": { - "filter_test_accounts": True, - "insight": "funnels", - "events": [ - {"order": 0, "id": "$pageview"}, - {"order": 1, "id": "$pageleave"}, - ], - "properties": [], - }, - }, - ) - - id = response.json()["id"] - - response = self.client.get(f"/api/projects/{self.team.id}/experiments/{id}/results") - self.assertEqual(200, response.status_code) - - response_data = response.json()["result"] - result = sorted(response_data["insight"], key=lambda x: x[0]["breakdown_value"][0]) - - self.assertEqual(result[0][0]["name"], "$pageview") - self.assertEqual(result[0][0]["count"], 2) - self.assertEqual("control", result[0][0]["breakdown_value"][0]) - - self.assertEqual(result[0][1]["name"], "$pageleave") - self.assertEqual(result[0][1]["count"], 2) - self.assertEqual("control", result[0][1]["breakdown_value"][0]) - - self.assertEqual(result[1][0]["name"], "$pageview") - self.assertEqual(result[1][0]["count"], 3) - self.assertEqual("test", result[1][0]["breakdown_value"][0]) - - self.assertEqual(result[1][1]["name"], "$pageleave") - self.assertEqual(result[1][1]["count"], 1) - self.assertEqual("test", result[1][1]["breakdown_value"][0]) - - # Variant with test: Beta(2, 3) and control: Beta(3, 1) distribution - # The variant has very low probability of being better. - self.assertAlmostEqual(response_data["probability"]["test"], 0.114, places=2) - self.assertEqual( - response_data["significance_code"], - ExperimentSignificanceCode.NOT_ENOUGH_EXPOSURE, - ) - self.assertAlmostEqual(response_data["expected_loss"], 1, places=2) - - def test_experiment_flow_with_event_results_cached(self): - journeys_for( - { - "person1": [ - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test"}, - }, - { - "event": "$pageleave", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "test"}, - }, - ], - "person2": [ - { - "event": "$pageview", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageleave", - "timestamp": "2020-01-05", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - "person3": [ - { - "event": "$pageview", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageleave", - "timestamp": "2020-01-05", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - # doesn't have feature set - "person_out_of_control": [ - {"event": "$pageview", "timestamp": "2020-01-03"}, - {"event": "$pageleave", "timestamp": "2020-01-05"}, - ], - "person_out_of_end_date": [ - { - "event": "$pageview", - "timestamp": "2020-08-03", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageleave", - "timestamp": "2020-08-05", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - # non-converters with FF - "person4": [ - { - "event": "$pageview", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "test"}, - } - ], - "person5": [ - { - "event": "$pageview", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "test"}, - } - ], - }, - self.team, - ) - - ff_key = "a-b-test" - # generates the FF which should result in the above events^ - - experiment_payload = { - "name": "Test Experiment", - "description": "", - "start_date": "2020-01-01T00:00", - "end_date": "2020-01-06T00:00", - "feature_flag_key": ff_key, - "parameters": None, - "filters": { - "insight": "funnels", - "events": [ - {"order": 0, "id": "$pageview"}, - {"order": 1, "id": "$pageleave"}, - ], - "properties": [], - }, - } - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - experiment_payload, - ) - - id = response.json()["id"] - - response = self.client.get(f"/api/projects/{self.team.id}/experiments/{id}/results") - self.assertEqual(200, response.status_code) - - response_json = response.json() - response_data = response_json["result"] - result = sorted(response_data["insight"], key=lambda x: x[0]["breakdown_value"][0]) - - self.assertEqual(response_json.pop("is_cached"), False) - - self.assertEqual(result[0][0]["name"], "$pageview") - self.assertEqual(result[0][0]["count"], 2) - self.assertEqual("control", result[0][0]["breakdown_value"][0]) - - self.assertEqual(result[0][1]["name"], "$pageleave") - self.assertEqual(result[0][1]["count"], 2) - self.assertEqual("control", result[0][1]["breakdown_value"][0]) - - self.assertEqual(result[1][0]["name"], "$pageview") - self.assertEqual(result[1][0]["count"], 3) - self.assertEqual("test", result[1][0]["breakdown_value"][0]) - - self.assertEqual(result[1][1]["name"], "$pageleave") - self.assertEqual(result[1][1]["count"], 1) - self.assertEqual("test", result[1][1]["breakdown_value"][0]) - - # Variant with test: Beta(2, 3) and control: Beta(3, 1) distribution - # The variant has very low probability of being better. - self.assertAlmostEqual(response_data["probability"]["test"], 0.114, places=2) - self.assertEqual( - response_data["significance_code"], - ExperimentSignificanceCode.NOT_ENOUGH_EXPOSURE, - ) - self.assertAlmostEqual(response_data["expected_loss"], 1, places=2) - - response2 = self.client.get(f"/api/projects/{self.team.id}/experiments/{id}/results") - - response2_json = response2.json() - - self.assertEqual(response2_json.pop("is_cached"), True) - self.assertEqual(response2_json["result"], response_data) - - @snapshot_clickhouse_queries - def test_experiment_flow_with_event_results_and_events_out_of_time_range_timezones(self): - journeys_for( - { - "person1": [ - { - "event": "$pageview", - "timestamp": "2020-01-01T13:40:00", - "properties": {"$feature/a-b-test": "test"}, - }, - { - "event": "$pageleave", - "timestamp": "2020-01-04T13:00:00", - "properties": {"$feature/a-b-test": "test"}, - }, - ], - "person2": [ - { - "event": "$pageview", - "timestamp": "2020-01-03T13:00:00", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageleave", - "timestamp": "2020-01-05 13:00:00", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - "person3": [ - { - "event": "$pageview", - "timestamp": "2020-01-04T13:00:00", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageleave", - "timestamp": "2020-01-05T13:00:00", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - # non-converters with FF - "person4": [ - { - "event": "$pageview", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "test"}, - } - ], - "person5": [ - { - "event": "$pageview", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "test"}, - } - ], - # converted on the same day as end date, but offset by a few minutes. - # experiment ended at 10 AM, UTC+1, so this person should not be included. - "person6": [ - { - "event": "$pageview", - "timestamp": "2020-01-06T09:10:00", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageleave", - "timestamp": "2020-01-06T09:25:00", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - }, - self.team, - ) - - self.team.timezone = "Europe/Amsterdam" # GMT+1 - self.team.save() - - ff_key = "a-b-test" - # generates the FF which should result in the above events^ - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "", - "feature_flag_key": ff_key, - "parameters": None, - "filters": { - "insight": "funnels", - "events": [ - {"order": 0, "id": "$pageview"}, - {"order": 1, "id": "$pageleave"}, - ], - "properties": [], - }, - }, - ) - - id = response.json()["id"] - - self.client.patch( - f"/api/projects/{self.team.id}/experiments/{id}/", - { - "start_date": "2020-01-01T13:20:21.710000Z", # date is after first event, BUT timezone is GMT+1, so should be included - "end_date": "2020-01-06 09:00", - }, - ) - - response = self.client.get(f"/api/projects/{self.team.id}/experiments/{id}/results") - self.assertEqual(200, response.status_code) - - response_data = response.json()["result"] - result = sorted(response_data["insight"], key=lambda x: x[0]["breakdown_value"][0]) - - self.assertEqual(result[0][0]["name"], "$pageview") - self.assertEqual(result[0][0]["count"], 2) - self.assertEqual("control", result[0][0]["breakdown_value"][0]) - - self.assertEqual(result[0][1]["name"], "$pageleave") - self.assertEqual(result[0][1]["count"], 2) - self.assertEqual("control", result[0][1]["breakdown_value"][0]) - - self.assertEqual(result[1][0]["name"], "$pageview") - self.assertEqual(result[1][0]["count"], 3) - self.assertEqual("test", result[1][0]["breakdown_value"][0]) - - self.assertEqual(result[1][1]["name"], "$pageleave") - self.assertEqual(result[1][1]["count"], 1) - self.assertEqual("test", result[1][1]["breakdown_value"][0]) - - # Variant with test: Beta(2, 3) and control: Beta(3, 1) distribution - # The variant has very low probability of being better. - self.assertAlmostEqual(response_data["probability"]["test"], 0.114, places=2) - self.assertEqual( - response_data["significance_code"], - ExperimentSignificanceCode.NOT_ENOUGH_EXPOSURE, - ) - self.assertAlmostEqual(response_data["expected_loss"], 1, places=2) - - @snapshot_clickhouse_queries - def test_experiment_flow_with_event_results_for_three_test_variants(self): - journeys_for( - { - "person1_2": [ - # one event having the property is sufficient, since first touch breakdown is the default - {"event": "$pageview", "timestamp": "2020-01-02", "properties": {}}, - { - "event": "$pageleave", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "test_2"}, - }, - ], - "person1_1": [ - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test_1"}, - }, - { - "event": "$pageleave", - "timestamp": "2020-01-04", - "properties": {}, - }, - ], - "person2_1": [ - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test_1"}, - }, - { - "event": "$pageleave", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "test_1"}, - }, - ], - "person1": [ - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test"}, - }, - { - "event": "$pageleave", - "timestamp": "2020-01-04", - "properties": {}, - }, - ], - "person2": [ - { - "event": "$pageview", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageleave", - "timestamp": "2020-01-05", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - "person3": [ - {"event": "$pageview", "timestamp": "2020-01-04", "properties": {}}, - { - "event": "$pageleave", - "timestamp": "2020-01-05", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - # doesn't have feature set - "person_out_of_control": [ - {"event": "$pageview", "timestamp": "2020-01-03"}, - {"event": "$pageleave", "timestamp": "2020-01-05"}, - ], - "person_out_of_end_date": [ - { - "event": "$pageview", - "timestamp": "2020-08-03", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageleave", - "timestamp": "2020-08-05", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - # non-converters with FF - "person4": [ - { - "event": "$pageview", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "test"}, - } - ], - "person5": [ - { - "event": "$pageview", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "test"}, - } - ], - "person6_1": [ - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test_1"}, - } - ], - # converters with unknown flag variant set - "person_unknown_1": [ - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "unknown_1"}, - }, - { - "event": "$pageleave", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "unknown_1"}, - }, - ], - "person_unknown_2": [ - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "unknown_2"}, - }, - { - "event": "$pageleave", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "unknown_2"}, - }, - ], - "person_unknown_3": [ - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "unknown_3"}, - }, - { - "event": "$pageleave", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "unknown_3"}, - }, - ], - }, - self.team, - ) - - ff_key = "a-b-test" - # generates the FF which should result in the above events^ - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "", - "start_date": "2020-01-01T00:00", - "end_date": "2020-01-06T00:00", - "feature_flag_key": ff_key, - "parameters": { - "feature_flag_variants": [ - { - "key": "control", - "name": "Control Group", - "rollout_percentage": 25, - }, - { - "key": "test_1", - "name": "Test Variant 1", - "rollout_percentage": 25, - }, - { - "key": "test_2", - "name": "Test Variant 2", - "rollout_percentage": 25, - }, - { - "key": "test", - "name": "Test Variant 3", - "rollout_percentage": 25, - }, - ] - }, - "filters": { - "insight": "funnels", - "events": [ - {"order": 0, "id": "$pageview"}, - {"order": 1, "id": "$pageleave"}, - ], - "properties": [], - }, - }, - ) - - id = response.json()["id"] - - response = self.client.get(f"/api/projects/{self.team.id}/experiments/{id}/results") - self.assertEqual(200, response.status_code) - - response_data = response.json()["result"] - result = sorted(response_data["insight"], key=lambda x: x[0]["breakdown_value"][0]) - - self.assertEqual(result[0][0]["name"], "$pageview") - self.assertEqual(result[0][0]["count"], 2) - self.assertEqual("control", result[0][0]["breakdown_value"][0]) - - self.assertEqual(result[0][1]["name"], "$pageleave") - self.assertEqual(result[0][1]["count"], 2) - self.assertEqual("control", result[0][1]["breakdown_value"][0]) - - self.assertEqual(result[1][0]["name"], "$pageview") - self.assertEqual(result[1][0]["count"], 3) - self.assertEqual("test", result[1][0]["breakdown_value"][0]) - - self.assertEqual(result[1][1]["name"], "$pageleave") - self.assertEqual(result[1][1]["count"], 1) - self.assertEqual("test", result[1][1]["breakdown_value"][0]) - - self.assertAlmostEqual(response_data["probability"]["test"], 0.031, places=1) - self.assertAlmostEqual(response_data["probability"]["test_1"], 0.158, places=1) - self.assertAlmostEqual(response_data["probability"]["test_2"], 0.324, places=1) - self.assertAlmostEqual(response_data["probability"]["control"], 0.486, places=1) - self.assertEqual( - response_data["significance_code"], - ExperimentSignificanceCode.NOT_ENOUGH_EXPOSURE, - ) - self.assertAlmostEqual(response_data["expected_loss"], 1, places=2) - - -@flaky(max_runs=10, min_passes=1) -class ClickhouseTestTrendExperimentResults(ClickhouseTestMixin, APILicensedTest): - @snapshot_clickhouse_queries - def test_experiment_flow_with_event_results(self): - self.team.test_account_filters = [ - { - "key": "exclude", - "type": "event", - "value": "yes", - "operator": "is_not_set", - } - ] - self.team.save() - - journeys_for( - { - "person1": [ - # 5 counts, single person - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test", "exclude": "yes"}, - }, - # exposure measured via $feature_flag_called events - { - "event": "$feature_flag_called", - "timestamp": "2020-01-02", - "properties": { - "$feature_flag": "a-b-test", - "$feature_flag_response": "test", - }, - }, - { - "event": "$feature_flag_called", - "timestamp": "2020-01-03", - "properties": { - "$feature_flag": "a-b-test", - "$feature_flag_response": "test", - }, - }, - { - "event": "$feature_flag_called", - "timestamp": "2020-01-02", - "properties": { - "$feature_flag": "a-b-test", - "$feature_flag_response": "test", - "exclude": "yes", - }, - }, - ], - "person2": [ - { - "event": "$feature_flag_called", - "timestamp": "2020-01-02", - "properties": { - "$feature_flag": "a-b-test", - "$feature_flag_response": "control", - }, - }, - { - "event": "$feature_flag_called", - "timestamp": "2020-01-02", - "properties": { - "$feature_flag": "a-b-test", - "$feature_flag_response": "control", - "exclude": "yes", - }, - }, - # 1 exposure, but more absolute counts - { - "event": "$pageview", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-05", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-05", - "properties": {"$feature/a-b-test": "control", "exclude": "yes"}, - }, - ], - "person3": [ - { - "event": "$pageview", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$feature_flag_called", - "timestamp": "2020-01-02", - "properties": { - "$feature_flag": "a-b-test", - "$feature_flag_response": "control", - }, - }, - { - "event": "$feature_flag_called", - "timestamp": "2020-01-03", - "properties": { - "$feature_flag": "a-b-test", - "$feature_flag_response": "control", - }, - }, - ], - # doesn't have feature set - "person_out_of_control": [ - {"event": "$pageview", "timestamp": "2020-01-03"}, - { - "event": "$feature_flag_called", - "timestamp": "2020-01-02", - "properties": { - "$feature_flag": "a-b-test", - "$feature_flag_response": "random", - }, - }, - ], - "person_out_of_end_date": [ - { - "event": "$pageview", - "timestamp": "2020-08-03", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$feature_flag_called", - "timestamp": "2020-08-03", - "properties": { - "$feature_flag": "a-b-test", - "$feature_flag_response": "control", - }, - }, - ], - }, - self.team, - ) - - ff_key = "a-b-test" - # generates the FF which should result in the above events^ - creation_response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "", - "start_date": "2020-01-01T00:00", - "end_date": "2020-01-06T00:00", - "feature_flag_key": ff_key, - "parameters": None, - "filters": { - "insight": "TRENDS", - "events": [{"order": 0, "id": "$pageview"}], - "filter_test_accounts": True, - }, - }, - ) - - id = creation_response.json()["id"] - - response = self.client.get(f"/api/projects/{self.team.id}/experiments/{id}/results") - self.assertEqual(200, response.status_code) - - response_data = response.json()["result"] - result = sorted(response_data["insight"], key=lambda x: x["breakdown_value"]) - - self.assertEqual(result[0]["count"], 4) - self.assertEqual("control", result[0]["breakdown_value"]) - - self.assertEqual(result[1]["count"], 5) - self.assertEqual("test", result[1]["breakdown_value"]) - - # Variant with test: Gamma(5, 0.5) and control: Gamma(5, 1) distribution - # The variant has high probability of being better. (effectively Gamma(10,1)) - self.assertAlmostEqual(response_data["probability"]["test"], 0.923, places=2) - self.assertFalse(response_data["significant"]) - - def test_experiment_flow_with_event_results_with_custom_exposure(self): - self.team.test_account_filters = [ - { - "key": "exclude", - "type": "event", - "value": "yes", - "operator": "is_not_set", - } - ] - self.team.save() - - journeys_for( - { - "person1": [ - # 5 counts, single person - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test", "exclude": "yes"}, - }, - # exposure measured via $feature_flag_called events - { - "event": "$feature_flag_called", - "timestamp": "2020-01-02", - "properties": { - "$feature_flag": "a-b-test", - "$feature_flag_response": "test", - }, - }, - { - "event": "$feature_flag_called", - "timestamp": "2020-01-03", - "properties": { - "$feature_flag": "a-b-test", - "$feature_flag_response": "test", - }, - }, - { - "event": "custom_exposure_event", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "test", "bonk": "bonk"}, - }, - { - "event": "custom_exposure_event", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "test", "bonk": "bonk", "exclude": "yes"}, - }, - { - "event": "custom_exposure_event", - "timestamp": "2020-01-03", - "properties": { - "$feature/a-b-test": "control", - "bonk": "no-bonk", - }, - }, - ], - "person2": [ - { - "event": "custom_exposure_event", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "control", "bonk": "bonk"}, - }, - { - "event": "custom_exposure_event", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "control", "bonk": "bonk", "exclude": "yes"}, - }, - # 1 exposure, but more absolute counts - { - "event": "$pageview", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-05", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - "person3": [ - { - "event": "$pageview", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "custom_exposure_event", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "control", "bonk": "bonk"}, - }, - { - "event": "custom_exposure_event", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "test", "bonk": "no-bonk"}, - }, - ], - # doesn't have feature set - "person_out_of_control": [ - {"event": "$pageview", "timestamp": "2020-01-03"}, - { - "event": "custom_exposure_event", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "random", "bonk": "bonk"}, - }, - { - "event": "custom_exposure_event", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "test", "bonk": "no-bonk"}, - }, - ], - "person_out_of_end_date": [ - { - "event": "$pageview", - "timestamp": "2020-08-03", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$feature_flag_called", - "timestamp": "2020-08-03", - "properties": { - "$feature_flag": "a-b-test", - "$feature_flag_response": "control", - }, - }, - { - "event": "custom_exposure_event", - "timestamp": "2020-08-03", - "properties": {"$feature/a-b-test": "test", "bonk": "bonk"}, - }, - ], - }, - self.team, - ) - - ff_key = "a-b-test" - # generates the FF which should result in the above events^ - creation_response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "", - "start_date": "2020-01-01T00:00", - "end_date": "2020-01-06T00:00", - "feature_flag_key": ff_key, - "parameters": { - "custom_exposure_filter": { - "events": [ - { - "id": "custom_exposure_event", - "order": 0, - "properties": [{"key": "bonk", "value": "bonk"}], - } - ], - "filter_test_accounts": True, - } - }, - "filters": { - "insight": "TRENDS", - "events": [{"order": 0, "id": "$pageview"}], - "filter_test_accounts": True, - }, - }, - ) - - id = creation_response.json()["id"] - - response = self.client.get(f"/api/projects/{self.team.id}/experiments/{id}/results") - self.assertEqual(200, response.status_code) - - response_data = response.json()["result"] - result = sorted(response_data["insight"], key=lambda x: x["breakdown_value"]) - - self.assertEqual(result[0]["count"], 4) - self.assertEqual("control", result[0]["breakdown_value"]) - - self.assertEqual(result[1]["count"], 5) - self.assertEqual("test", result[1]["breakdown_value"]) - - # Variant with test: Gamma(5, 0.5) and control: Gamma(5, 1) distribution - # The variant has high probability of being better. (effectively Gamma(10,1)) - self.assertAlmostEqual(response_data["probability"]["test"], 0.923, places=2) - self.assertFalse(response_data["significant"]) - - @snapshot_clickhouse_queries - def test_experiment_flow_with_event_results_with_hogql_filter(self): - journeys_for( - { - "person1": [ - # 5 counts, single person - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test", "hogql": "true"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test", "hogql": "true"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test", "hogql": "true"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test", "hogql": "true"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test", "hogql": "true"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test"}, - }, - # exposure measured via $feature_flag_called events - { - "event": "$feature_flag_called", - "timestamp": "2020-01-02", - "properties": { - "$feature_flag": "a-b-test", - "$feature_flag_response": "test", - }, - }, - { - "event": "$feature_flag_called", - "timestamp": "2020-01-03", - "properties": { - "$feature_flag": "a-b-test", - "$feature_flag_response": "test", - }, - }, - ], - "person2": [ - { - "event": "$feature_flag_called", - "timestamp": "2020-01-02", - "properties": { - "$feature_flag": "a-b-test", - "$feature_flag_response": "control", - }, - }, - # 1 exposure, but more absolute counts - { - "event": "$pageview", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "control", "hogql": "true"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "control", "hogql": "true"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-05", - "properties": {"$feature/a-b-test": "control", "hogql": "true"}, - }, - ], - "person3": [ - { - "event": "$pageview", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "control", "hogql": "true"}, - }, - { - "event": "$feature_flag_called", - "timestamp": "2020-01-02", - "properties": { - "$feature_flag": "a-b-test", - "$feature_flag_response": "control", - }, - }, - { - "event": "$feature_flag_called", - "timestamp": "2020-01-03", - "properties": { - "$feature_flag": "a-b-test", - "$feature_flag_response": "control", - }, - }, - ], - # doesn't have feature set - "person_out_of_control": [ - {"event": "$pageview", "timestamp": "2020-01-03"}, - { - "event": "$feature_flag_called", - "timestamp": "2020-01-02", - "properties": { - "$feature_flag": "a-b-test", - "$feature_flag_response": "random", - }, - }, - ], - "person_out_of_end_date": [ - { - "event": "$pageview", - "timestamp": "2020-08-03", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$feature_flag_called", - "timestamp": "2020-08-03", - "properties": { - "$feature_flag": "a-b-test", - "$feature_flag_response": "control", - }, - }, - ], - }, - self.team, - ) - - ff_key = "a-b-test" - # generates the FF which should result in the above events^ - creation_response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "", - "start_date": "2020-01-01T00:00", - "end_date": "2020-01-06T00:00", - "feature_flag_key": ff_key, - "parameters": None, - "filters": { - "insight": "TRENDS", - "events": [ - { - "order": 0, - "id": "$pageview", - "properties": [ - { - "key": "properties.hogql ilike 'true'", - "type": "hogql", - "value": None, - } - ], - } - ], - }, - }, - ) - - id = creation_response.json()["id"] - - response = self.client.get(f"/api/projects/{self.team.id}/experiments/{id}/results") - self.assertEqual(200, response.status_code) - - response_data = response.json()["result"] - result = sorted(response_data["insight"], key=lambda x: x["breakdown_value"]) - - self.assertEqual(result[0]["count"], 4) - self.assertEqual("control", result[0]["breakdown_value"]) - - self.assertEqual(result[1]["count"], 5) - self.assertEqual("test", result[1]["breakdown_value"]) - - # Variant with test: Gamma(5, 0.5) and control: Gamma(5, 1) distribution - # The variant has high probability of being better. (effectively Gamma(10,1)) - self.assertAlmostEqual(response_data["probability"]["test"], 0.923, places=2) - self.assertFalse(response_data["significant"]) - - @snapshot_clickhouse_queries - def test_experiment_flow_with_event_results_out_of_timerange_timezone(self): - journeys_for( - { - "person1": [ - # 5 counts, single person - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test"}, - }, - # exposure measured via $feature_flag_called events - { - "event": "$feature_flag_called", - "timestamp": "2020-01-02", - "properties": { - "$feature_flag": "a-b-test", - "$feature_flag_response": "test", - }, - }, - { - "event": "$feature_flag_called", - "timestamp": "2020-01-03", - "properties": { - "$feature_flag": "a-b-test", - "$feature_flag_response": "test", - }, - }, - ], - "person2": [ - { - "event": "$feature_flag_called", - "timestamp": "2020-01-02", - "properties": { - "$feature_flag": "a-b-test", - "$feature_flag_response": "control", - }, - }, - # 1 exposure, but more absolute counts - { - "event": "$pageview", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-05", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - "person3": [ - { - "event": "$pageview", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$feature_flag_called", - "timestamp": "2020-01-02", - "properties": { - "$feature_flag": "a-b-test", - "$feature_flag_response": "control", - }, - }, - { - "event": "$feature_flag_called", - "timestamp": "2020-01-03", - "properties": { - "$feature_flag": "a-b-test", - "$feature_flag_response": "control", - }, - }, - ], - # doesn't have feature set - "person_out_of_control": [ - {"event": "$pageview", "timestamp": "2020-01-03"}, - { - "event": "$feature_flag_called", - "timestamp": "2020-01-02", - "properties": { - "$feature_flag": "a-b-test", - "$feature_flag_response": "random", - }, - }, - ], - "person_out_of_end_date": [ - { - "event": "$pageview", - "timestamp": "2020-08-03", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$feature_flag_called", - "timestamp": "2020-08-03", - "properties": { - "$feature_flag": "a-b-test", - "$feature_flag_response": "control", - }, - }, - ], - # slightly out of time range - "person_t1": [ - { - "event": "$pageview", - "timestamp": "2020-01-01 09:00:00", - "properties": {"$feature/a-b-test": "test"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-01 08:00:00", - "properties": {"$feature/a-b-test": "test"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-01 07:00:00", - "properties": {"$feature/a-b-test": "test"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-01 06:00:00", - "properties": {"$feature/a-b-test": "test"}, - }, - { - "event": "$feature_flag_called", - "timestamp": "2020-01-01 06:00:00", - "properties": { - "$feature_flag": "a-b-test", - "$feature_flag_response": "test", - }, - }, - { - "event": "$feature_flag_called", - "timestamp": "2020-01-01 08:00:00", - "properties": { - "$feature_flag": "a-b-test", - "$feature_flag_response": "test", - }, - }, - ], - "person_t2": [ - { - "event": "$pageview", - "timestamp": "2020-01-06 15:02:00", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$feature_flag_called", - "timestamp": "2020-01-06 15:02:00", - "properties": { - "$feature_flag": "a-b-test", - "$feature_flag_response": "control", - }, - }, - { - "event": "$feature_flag_called", - "timestamp": "2020-01-06 16:00:00", - "properties": { - "$feature_flag": "a-b-test", - "$feature_flag_response": "control", - }, - }, - ], - }, - self.team, - ) - - self.team.timezone = "US/Pacific" # GMT -8 - self.team.save() - - ff_key = "a-b-test" - # generates the FF which should result in the above events^ - creation_response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "", - "start_date": "2020-01-01T10:10", # 2 PM in GMT-8 is 10 PM in GMT - "end_date": "2020-01-06T15:00", - "feature_flag_key": ff_key, - "parameters": None, - "filters": { - "insight": "TRENDS", - "events": [{"order": 0, "id": "$pageview"}], - }, - }, - ) - - id = creation_response.json()["id"] - - response = self.client.get(f"/api/projects/{self.team.id}/experiments/{id}/results") - self.assertEqual(200, response.status_code) - - response_data = response.json()["result"] - result = sorted(response_data["insight"], key=lambda x: x["breakdown_value"]) - - self.assertEqual(result[0]["count"], 4) - self.assertEqual("control", result[0]["breakdown_value"]) - - self.assertEqual(result[1]["count"], 5) - self.assertEqual("test", result[1]["breakdown_value"]) - - # Variant with test: Gamma(5, 0.5) and control: Gamma(5, 1) distribution - # The variant has high probability of being better. (effectively Gamma(10,1)) - self.assertAlmostEqual(response_data["probability"]["test"], 0.923, places=2) - self.assertFalse(response_data["significant"]) - - @snapshot_clickhouse_queries - def test_experiment_flow_with_event_results_for_three_test_variants(self): - journeys_for( - { - "person1_2": [ - { - "event": "$pageview1", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test_2"}, - } - ], - "person1_1": [ - { - "event": "$pageview1", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test_1"}, - } - ], - "person2_1": [ - { - "event": "$pageview1", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test_1"}, - } - ], - # "person1": [ - # {"event": "$pageview1", "timestamp": "2020-01-02", "properties": {"$feature/a-b-test": "test"},}, - # ], - "person2": [ - { - "event": "$pageview1", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "control"}, - } - ], - "person3": [ - { - "event": "$pageview1", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "control"}, - } - ], - "person4": [ - { - "event": "$pageview1", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "control"}, - } - ], - # doesn't have feature set - "person_out_of_control": [{"event": "$pageview1", "timestamp": "2020-01-03"}], - "person_out_of_end_date": [ - { - "event": "$pageview1", - "timestamp": "2020-08-03", - "properties": {"$feature/a-b-test": "control"}, - } - ], - }, - self.team, - ) - - ff_key = "a-b-test" - # generates the FF which should result in the above events^ - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "", - "start_date": "2020-01-01T00:00", - "end_date": "2020-01-06T00:00", - "feature_flag_key": ff_key, - "parameters": { - "feature_flag_variants": [ - { - "key": "control", - "name": "Control Group", - "rollout_percentage": 25, - }, - { - "key": "test_1", - "name": "Test Variant 1", - "rollout_percentage": 25, - }, - { - "key": "test_2", - "name": "Test Variant 2", - "rollout_percentage": 25, - }, - { - "key": "test", - "name": "Test Variant 3", - "rollout_percentage": 25, - }, - ] - }, - "filters": { - "insight": "trends", - "events": [{"order": 0, "id": "$pageview1"}], - "properties": [], - }, - }, - ) - - id = response.json()["id"] - - response = self.client.get(f"/api/projects/{self.team.id}/experiments/{id}/results") - self.assertEqual(200, response.status_code) - - response_data = response.json()["result"] - result = sorted(response_data["insight"], key=lambda x: x["breakdown_value"]) - - self.assertEqual(result[0]["count"], 3) - self.assertEqual("control", result[0]["breakdown_value"]) - - self.assertEqual(result[1]["count"], 2) - self.assertEqual("test_1", result[1]["breakdown_value"]) - - self.assertEqual(result[2]["count"], 1) - self.assertEqual("test_2", result[2]["breakdown_value"]) - - # test missing from results, since no events - self.assertAlmostEqual(response_data["probability"]["test_1"], 0.299, places=2) - self.assertAlmostEqual(response_data["probability"]["test_2"], 0.119, places=2) - self.assertAlmostEqual(response_data["probability"]["control"], 0.583, places=2) - - def test_experiment_flow_with_event_results_for_two_test_variants_with_varying_exposures(self): - journeys_for( - { - "person1_2": [ - # for count data - { - "event": "$pageview1", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test_2"}, - }, - { - "event": "$pageview1", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test_2"}, - }, - # for exposure counting (counted as 1 only) - { - "event": "$feature_flag_called", - "timestamp": "2020-01-02", - "properties": { - "$feature_flag": "a-b-test", - "$feature_flag_response": "test_2", - }, - }, - { - "event": "$feature_flag_called", - "timestamp": "2020-01-02", - "properties": { - "$feature_flag": "a-b-test", - "$feature_flag_response": "test_2", - }, - }, - ], - "person1_1": [ - { - "event": "$pageview1", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test_1"}, - }, - { - "event": "$feature_flag_called", - "timestamp": "2020-01-02", - "properties": { - "$feature_flag": "a-b-test", - "$feature_flag_response": "test_1", - }, - }, - ], - "person2_1": [ - { - "event": "$pageview1", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test_1"}, - }, - { - "event": "$pageview1", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test_1"}, - }, - { - "event": "$feature_flag_called", - "timestamp": "2020-01-02", - "properties": { - "$feature_flag": "a-b-test", - "$feature_flag_response": "test_1", - }, - }, - ], - "person2": [ - { - "event": "$pageview1", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageview1", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "control"}, - }, - # 0 exposure shouldn't ideally happen, but it's possible - ], - "person3": [ - { - "event": "$pageview1", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$feature_flag_called", - "timestamp": "2020-01-02", - "properties": { - "$feature_flag": "a-b-test", - "$feature_flag_response": "control", - }, - }, - ], - "person4": [ - { - "event": "$pageview1", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$feature_flag_called", - "timestamp": "2020-01-02", - "properties": { - "$feature_flag": "a-b-test", - "$feature_flag_response": "control", - }, - }, - ], - # doesn't have feature set - "person_out_of_control": [{"event": "$pageview1", "timestamp": "2020-01-03"}], - "person_out_of_end_date": [ - { - "event": "$pageview1", - "timestamp": "2020-08-03", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$feature_flag_called", - "timestamp": "2020-08-02", - "properties": { - "$feature_flag": "a-b-test", - "$feature_flag_response": "control", - }, - }, - ], - }, - self.team, - ) - - ff_key = "a-b-test" - # generates the FF which should result in the above events^ - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "", - "start_date": "2020-01-01T00:00", - "end_date": "2020-01-06T00:00", - "feature_flag_key": ff_key, - "parameters": { - "feature_flag_variants": [ - { - "key": "control", - "name": "Control Group", - "rollout_percentage": 33, - }, - { - "key": "test_1", - "name": "Test Variant 1", - "rollout_percentage": 33, - }, - { - "key": "test_2", - "name": "Test Variant 2", - "rollout_percentage": 34, - }, - ] - }, - "filters": { - "insight": "trends", - "events": [{"order": 0, "id": "$pageview1"}], - }, - }, - ) - - id = response.json()["id"] - - response = self.client.get(f"/api/projects/{self.team.id}/experiments/{id}/results") - self.assertEqual(200, response.status_code) - - response_data = response.json()["result"] - result = sorted(response_data["insight"], key=lambda x: x["breakdown_value"]) - - self.assertEqual(result[0]["count"], 4) - self.assertEqual("control", result[0]["breakdown_value"]) - - self.assertEqual(result[1]["count"], 3) - self.assertEqual("test_1", result[1]["breakdown_value"]) - - self.assertEqual(result[2]["count"], 2) - self.assertEqual("test_2", result[2]["breakdown_value"]) - - # control: Gamma(4, 1) - # test1: Gamma(3, 1) - # test2: Gamma(2, 0.5) - self.assertAlmostEqual(response_data["probability"]["test_1"], 0.177, places=2) - self.assertAlmostEqual(response_data["probability"]["test_2"], 0.488, places=2) - self.assertAlmostEqual(response_data["probability"]["control"], 0.334, places=2) - - def test_experiment_flow_with_avg_count_per_user_event_results(self): - journeys_for( - { - "person1": [ - # 5 counts, single person - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test"}, - }, - ], - "person2": [ - { - "event": "$pageview", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "control"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-05", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - "person3": [ - { - "event": "$pageview", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - "person4": [ - { - "event": "$pageview", - "timestamp": "2020-01-05", - "properties": {"$feature/a-b-test": "test"}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-05", - "properties": {"$feature/a-b-test": "test"}, - }, - ], - # doesn't have feature set - "person_out_of_control": [ - {"event": "$pageview", "timestamp": "2020-01-03"}, - ], - "person_out_of_end_date": [ - { - "event": "$pageview", - "timestamp": "2020-08-03", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - }, - self.team, - ) - - ff_key = "a-b-test" - # generates the FF which should result in the above events^ - creation_response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "", - "start_date": "2020-01-01T00:00", - "end_date": "2020-01-06T00:00", - "feature_flag_key": ff_key, - "parameters": None, - "filters": { - "insight": "TRENDS", - "events": [ - { - "order": 0, - "id": "$pageview", - "math": "avg_count_per_actor", - "name": "$pageview", - } - ], - "properties": [], - }, - }, - ) - - id = creation_response.json()["id"] - - response = self.client.get(f"/api/projects/{self.team.id}/experiments/{id}/results") - self.assertEqual(200, response.status_code) - - response_data = response.json()["result"] - result = sorted(response_data["insight"], key=lambda x: x["breakdown_value"]) - - self.assertEqual(result[0]["data"], [0.0, 0.0, 1.0, 1.0, 1.0, 0.0]) - self.assertEqual("control", result[0]["breakdown_value"]) - - self.assertEqual(result[1]["data"], [0.0, 5.0, 0.0, 0.0, 2.0, 0.0]) - self.assertEqual("test", result[1]["breakdown_value"]) - - # Variant with test: Gamma(7, 1) and control: Gamma(4, 1) distribution - # The variant has high probability of being better. (effectively Gamma(10,1)) - self.assertAlmostEqual(response_data["probability"]["test"], 0.805, places=2) - self.assertFalse(response_data["significant"]) - - def test_experiment_flow_with_avg_count_per_property_value_results(self): - journeys_for( - { - "person1": [ - # 5 counts, single person - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test", "mathable": 1}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test", "mathable": 1}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test", "mathable": 3}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test", "mathable": 3}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test", "mathable": 100}, - }, - ], - "person2": [ - { - "event": "$pageview", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "control", "mathable": 1}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "control", "mathable": 1}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-05", - "properties": {"$feature/a-b-test": "control", "mathable": 1}, - }, - ], - "person3": [ - { - "event": "$pageview", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "control", "mathable": 2}, - }, - ], - "person4": [ - { - "event": "$pageview", - "timestamp": "2020-01-05", - "properties": {"$feature/a-b-test": "test", "mathable": 1}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-05", - "properties": {"$feature/a-b-test": "test", "mathable": 1.5}, - }, - ], - # doesn't have feature set - "person_out_of_control": [ - {"event": "$pageview", "timestamp": "2020-01-03"}, - ], - "person_out_of_end_date": [ - { - "event": "$pageview", - "timestamp": "2020-08-03", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - }, - self.team, - ) - - ff_key = "a-b-test" - # generates the FF which should result in the above events^ - creation_response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "", - "start_date": "2020-01-01T00:00", - "end_date": "2020-01-06T00:00", - "feature_flag_key": ff_key, - "parameters": None, - "filters": { - "insight": "TRENDS", - "events": [ - { - "order": 0, - "id": "$pageview", - "math": "max", - "math_property": "mathable", - } - ], - "properties": [], - }, - }, - ) - - id = creation_response.json()["id"] - - response = self.client.get(f"/api/projects/{self.team.id}/experiments/{id}/results") - self.assertEqual(200, response.status_code) - - response_data = response.json()["result"] - result = sorted(response_data["insight"], key=lambda x: x["breakdown_value"]) - - self.assertEqual(result[0]["data"], [0.0, 0.0, 1.0, 2.0, 1.0, 0.0]) - self.assertEqual("control", result[0]["breakdown_value"]) - - self.assertEqual(result[1]["data"], [0.0, 100.0, 0.0, 0.0, 1.5, 0.0]) - self.assertEqual("test", result[1]["breakdown_value"]) - - # Variant with test: Gamma(7, 1) and control: Gamma(4, 1) distribution - # The variant has high probability of being better. (effectively Gamma(10,1)) - self.assertAlmostEqual(response_data["probability"]["test"], 0.805, places=2) - self.assertFalse(response_data["significant"]) - - def test_experiment_flow_with_sum_count_per_property_value_results(self): - journeys_for( - { - "person1": [ - # 5 counts, single person - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test", "mathable": 1}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test", "mathable": 1}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test", "mathable": 3}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test", "mathable": 3}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-02", - "properties": {"$feature/a-b-test": "test", "mathable": 10}, - }, - ], - "person2": [ - { - "event": "$pageview", - "timestamp": "2020-01-03", - "properties": {"$feature/a-b-test": "control", "mathable": 1}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "control", "mathable": 1}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-05", - "properties": {"$feature/a-b-test": "control", "mathable": 1}, - }, - ], - "person3": [ - { - "event": "$pageview", - "timestamp": "2020-01-04", - "properties": {"$feature/a-b-test": "control", "mathable": 2}, - }, - ], - "person4": [ - { - "event": "$pageview", - "timestamp": "2020-01-05", - "properties": {"$feature/a-b-test": "test", "mathable": 1}, - }, - { - "event": "$pageview", - "timestamp": "2020-01-05", - "properties": {"$feature/a-b-test": "test", "mathable": 1.5}, - }, - ], - # doesn't have feature set - "person_out_of_control": [ - {"event": "$pageview", "timestamp": "2020-01-03"}, - ], - "person_out_of_end_date": [ - { - "event": "$pageview", - "timestamp": "2020-08-03", - "properties": {"$feature/a-b-test": "control"}, - }, - ], - }, - self.team, - ) - - ff_key = "a-b-test" - # generates the FF which should result in the above events^ - creation_response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "", - "start_date": "2020-01-01T00:00", - "end_date": "2020-01-06T00:00", - "feature_flag_key": ff_key, - "parameters": { - "custom_exposure_filter": { - "events": [ - { - "id": "$pageview", # exposure is total pageviews - "order": 0, - } - ], - } - }, - "filters": { - "insight": "TRENDS", - "events": [ - { - "order": 0, - "id": "$pageview", - "math": "sum", - "math_property": "mathable", - } - ], - "properties": [], - }, - }, - ) - - id = creation_response.json()["id"] - - response = self.client.get(f"/api/projects/{self.team.id}/experiments/{id}/results") - self.assertEqual(200, response.status_code) - - response_data = response.json()["result"] - result = sorted(response_data["insight"], key=lambda x: x["breakdown_value"]) - - self.assertEqual(result[0]["data"], [0.0, 0.0, 1.0, 4.0, 5.0, 5.0]) - self.assertEqual("control", result[0]["breakdown_value"]) - - self.assertEqual(result[1]["data"], [0.0, 18.0, 18.0, 18.0, 20.5, 20.5]) - self.assertEqual("test", result[1]["breakdown_value"]) - - # Variant with test: Gamma(7, 1) and control: Gamma(4, 1) distribution - # The variant has high probability of being better. (effectively Gamma(10,1)) - self.assertAlmostEqual(response_data["probability"]["test"], 0.9513, places=2) - self.assertFalse(response_data["significant"]) diff --git a/ee/clickhouse/views/test/test_clickhouse_groups.py b/ee/clickhouse/views/test/test_clickhouse_groups.py deleted file mode 100644 index fba4063867..0000000000 --- a/ee/clickhouse/views/test/test_clickhouse_groups.py +++ /dev/null @@ -1,655 +0,0 @@ -from unittest import mock -from uuid import UUID - -from freezegun.api import freeze_time - -from posthog.models import GroupTypeMapping, Person -from posthog.models.group.util import create_group -from posthog.models.organization import Organization -from posthog.models.sharing_configuration import SharingConfiguration -from posthog.models.team.team import Team -from posthog.test.base import ( - APIBaseTest, - ClickhouseTestMixin, - _create_event, - snapshot_clickhouse_queries, -) - - -class ClickhouseTestGroupsApi(ClickhouseTestMixin, APIBaseTest): - maxDiff = None - - @freeze_time("2021-05-02") - def test_groups_list(self): - with freeze_time("2021-05-01"): - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key="org:5", - properties={"industry": "finance", "name": "Mr. Krabs"}, - ) - with freeze_time("2021-05-02"): - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key="org:6", - properties={"industry": "technology"}, - ) - create_group( - team_id=self.team.pk, - group_type_index=1, - group_key="company:1", - properties={"name": "Plankton"}, - ) - - response_data = self.client.get(f"/api/projects/{self.team.id}/groups?group_type_index=0").json() - self.assertEqual( - response_data, - { - "next": None, - "previous": None, - "results": [ - { - "created_at": "2021-05-02T00:00:00Z", - "group_key": "org:6", - "group_properties": {"industry": "technology"}, - "group_type_index": 0, - }, - { - "created_at": "2021-05-01T00:00:00Z", - "group_key": "org:5", - "group_properties": { - "industry": "finance", - "name": "Mr. Krabs", - }, - "group_type_index": 0, - }, - ], - }, - ) - response_data = self.client.get(f"/api/projects/{self.team.id}/groups?group_type_index=0&search=Krabs").json() - self.assertEqual( - response_data, - { - "next": None, - "previous": None, - "results": [ - { - "created_at": "2021-05-01T00:00:00Z", - "group_key": "org:5", - "group_properties": { - "industry": "finance", - "name": "Mr. Krabs", - }, - "group_type_index": 0, - }, - ], - }, - ) - - response_data = self.client.get(f"/api/projects/{self.team.id}/groups?group_type_index=0&search=org:5").json() - self.assertEqual( - response_data, - { - "next": None, - "previous": None, - "results": [ - { - "created_at": "2021-05-01T00:00:00Z", - "group_key": "org:5", - "group_properties": { - "industry": "finance", - "name": "Mr. Krabs", - }, - "group_type_index": 0, - }, - ], - }, - ) - - @freeze_time("2021-05-02") - def test_groups_list_no_group_type(self): - response_data = self.client.get(f"/api/projects/{self.team.id}/groups/").json() - self.assertEqual( - response_data, - { - "type": "validation_error", - "attr": "group_type_index", - "code": "invalid_input", - "detail": mock.ANY, - }, - ) - - @freeze_time("2021-05-02") - def test_retrieve_group(self): - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key="key", - properties={"industry": "finance", "name": "Mr. Krabs"}, - ) - create_group( - team_id=self.team.pk, - group_type_index=1, - group_key="foo//bar", - properties={}, - ) - - fail_response = self.client.get(f"/api/projects/{self.team.id}/groups/find?group_type_index=1&group_key=key") - self.assertEqual(fail_response.status_code, 404) - - ok_response_data = self.client.get(f"/api/projects/{self.team.id}/groups/find?group_type_index=0&group_key=key") - self.assertEqual(ok_response_data.status_code, 200) - self.assertEqual( - ok_response_data.json(), - { - "created_at": "2021-05-02T00:00:00Z", - "group_key": "key", - "group_properties": {"industry": "finance", "name": "Mr. Krabs"}, - "group_type_index": 0, - }, - ) - ok_response_data = self.client.get( - f"/api/projects/{self.team.id}/groups/find?group_type_index=1&group_key=foo//bar" - ) - self.assertEqual(ok_response_data.status_code, 200) - self.assertEqual( - ok_response_data.json(), - { - "created_at": "2021-05-02T00:00:00Z", - "group_key": "foo//bar", - "group_properties": {}, - "group_type_index": 1, - }, - ) - - @freeze_time("2021-05-10") - @snapshot_clickhouse_queries - def test_related_groups(self): - self._create_related_groups_data() - - response_data = self.client.get( - f"/api/projects/{self.team.id}/groups/related?id=0::0&group_type_index=0" - ).json() - self.assertEqual( - response_data, - [ - { - "created_at": "2021-05-10T00:00:00Z", - "distinct_ids": ["1", "2"], - "id": "01795392-cc00-0003-7dc7-67a694604d72", - "uuid": "01795392-cc00-0003-7dc7-67a694604d72", - "is_identified": False, - "name": "1", - "properties": {}, - "type": "person", - "matched_recordings": [], - "value_at_data_point": None, - }, - { - "created_at": "2021-05-10T00:00:00Z", - "group_key": "1::2", - "group_type_index": 1, - "id": "1::2", - "properties": {}, - "type": "group", - "matched_recordings": [], - "value_at_data_point": None, - }, - { - "created_at": "2021-05-10T00:00:00Z", - "group_key": "1::3", - "group_type_index": 1, - "id": "1::3", - "properties": {}, - "type": "group", - "matched_recordings": [], - "value_at_data_point": None, - }, - ], - ) - - @freeze_time("2021-05-10") - @snapshot_clickhouse_queries - def test_related_groups_person(self): - uuid = self._create_related_groups_data() - - response_data = self.client.get(f"/api/projects/{self.team.id}/groups/related?id={uuid}").json() - self.assertEqual( - response_data, - [ - { - "created_at": "2021-05-10T00:00:00Z", - "group_key": "0::0", - "group_type_index": 0, - "id": "0::0", - "properties": {}, - "type": "group", - "matched_recordings": [], - "value_at_data_point": None, - }, - { - "created_at": "2021-05-10T00:00:00Z", - "group_key": "0::1", - "group_type_index": 0, - "id": "0::1", - "properties": {}, - "type": "group", - "matched_recordings": [], - "value_at_data_point": None, - }, - { - "created_at": "2021-05-10T00:00:00Z", - "group_key": "1::2", - "group_type_index": 1, - "id": "1::2", - "properties": {}, - "type": "group", - "matched_recordings": [], - "value_at_data_point": None, - }, - { - "created_at": "2021-05-10T00:00:00Z", - "group_key": "1::3", - "group_type_index": 1, - "id": "1::3", - "properties": {}, - "type": "group", - "matched_recordings": [], - "value_at_data_point": None, - }, - ], - ) - - def test_property_definitions(self): - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key="org:5", - properties={"industry": "finance", "name": "Mr. Krabs"}, - ) - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key="org:6", - properties={"industry": "technology"}, - ) - create_group( - team_id=self.team.pk, - group_type_index=1, - group_key="company:1", - properties={"name": "Plankton"}, - ) - create_group( - team_id=self.team.pk, - group_type_index=1, - group_key="company:2", - properties={}, - ) - - response_data = self.client.get(f"/api/projects/{self.team.id}/groups/property_definitions").json() - self.assertEqual( - response_data, - { - "0": [{"name": "industry", "count": 2}, {"name": "name", "count": 1}], - "1": [{"name": "name", "count": 1}], - }, - ) - - def test_property_values(self): - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key="org:5", - properties={"industry": "finance"}, - ) - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key="org:6", - properties={"industry": "technology"}, - ) - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key="org:7", - properties={"industry": "finance-technology"}, - ) - create_group( - team_id=self.team.pk, - group_type_index=1, - group_key="org:1", - properties={"industry": "finance"}, - ) - - # Test without query parameter - response_data = self.client.get( - f"/api/projects/{self.team.id}/groups/property_values/?key=industry&group_type_index=0" - ).json() - self.assertEqual(len(response_data), 3) - self.assertEqual( - response_data, - [ - {"name": "finance", "count": 1}, - {"name": "finance-technology", "count": 1}, - {"name": "technology", "count": 1}, - ], - ) - - # Test with query parameter - response_data = self.client.get( - f"/api/projects/{self.team.id}/groups/property_values/?key=industry&group_type_index=0&value=fin" - ).json() - self.assertEqual(len(response_data), 2) - self.assertEqual(response_data, [{"name": "finance", "count": 1}, {"name": "finance-technology", "count": 1}]) - - # Test with query parameter - case insensitive - response_data = self.client.get( - f"/api/projects/{self.team.id}/groups/property_values/?key=industry&group_type_index=0&value=TECH" - ).json() - self.assertEqual(len(response_data), 2) - self.assertEqual( - response_data, [{"name": "finance-technology", "count": 1}, {"name": "technology", "count": 1}] - ) - - # Test with query parameter - no matches - response_data = self.client.get( - f"/api/projects/{self.team.id}/groups/property_values/?key=industry&group_type_index=0&value=healthcare" - ).json() - self.assertEqual(len(response_data), 0) - self.assertEqual(response_data, []) - - # Test with query parameter - exact match - response_data = self.client.get( - f"/api/projects/{self.team.id}/groups/property_values/?key=industry&group_type_index=0&value=technology" - ).json() - self.assertEqual(len(response_data), 2) - self.assertEqual( - response_data, [{"name": "finance-technology", "count": 1}, {"name": "technology", "count": 1}] - ) - - # Test with different group_type_index - response_data = self.client.get( - f"/api/projects/{self.team.id}/groups/property_values/?key=industry&group_type_index=1&value=fin" - ).json() - self.assertEqual(len(response_data), 1) - self.assertEqual(response_data, [{"name": "finance", "count": 1}]) - - def test_empty_property_values(self): - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key="org:5", - properties={"industry": "finance"}, - ) - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key="org:6", - properties={"industry": "technology"}, - ) - create_group( - team_id=self.team.pk, - group_type_index=1, - group_key="org:1", - properties={"industry": "finance"}, - ) - response_data = self.client.get( - f"/api/projects/{self.team.id}/groups/property_values/?key=name&group_type_index=0" - ).json() - self.assertEqual(len(response_data), 0) - self.assertEqual(response_data, []) - - def test_update_groups_metadata(self): - GroupTypeMapping.objects.create( - team=self.team, project_id=self.team.project_id, group_type="organization", group_type_index=0 - ) - GroupTypeMapping.objects.create( - team=self.team, project_id=self.team.project_id, group_type="playlist", group_type_index=1 - ) - GroupTypeMapping.objects.create( - team=self.team, project_id=self.team.project_id, group_type="another", group_type_index=2 - ) - - response_data = self.client.patch( - f"/api/projects/{self.team.id}/groups_types/update_metadata", - [ - {"group_type_index": 0, "name_singular": "organization!"}, - { - "group_type_index": 1, - "group_type": "rename attempt", - "name_plural": "playlists", - }, - ], - ).json() - - self.assertEqual( - response_data, - [ - { - "group_type_index": 0, - "group_type": "organization", - "name_singular": "organization!", - "name_plural": None, - }, - { - "group_type_index": 1, - "group_type": "playlist", - "name_singular": None, - "name_plural": "playlists", - }, - { - "group_type_index": 2, - "group_type": "another", - "name_singular": None, - "name_plural": None, - }, - ], - ) - - def test_list_group_types(self): - GroupTypeMapping.objects.create( - team=self.team, project_id=self.team.project_id, group_type="organization", group_type_index=0 - ) - GroupTypeMapping.objects.create( - team=self.team, project_id=self.team.project_id, group_type="playlist", group_type_index=1 - ) - GroupTypeMapping.objects.create( - team=self.team, project_id=self.team.project_id, group_type="another", group_type_index=2 - ) - - response_data = self.client.get(f"/api/projects/{self.team.id}/groups_types").json() - - self.assertEqual( - response_data, - [ - { - "group_type_index": 0, - "group_type": "organization", - "name_singular": None, - "name_plural": None, - }, - { - "group_type_index": 1, - "group_type": "playlist", - "name_singular": None, - "name_plural": None, - }, - { - "group_type_index": 2, - "group_type": "another", - "name_singular": None, - "name_plural": None, - }, - ], - ) - - def test_cannot_list_group_types_of_another_org(self): - other_org = Organization.objects.create(name="other org") - other_team = Team.objects.create(organization=other_org, name="other project") - - GroupTypeMapping.objects.create( - team=other_team, project_id=other_team.project_id, group_type="organization", group_type_index=0 - ) - GroupTypeMapping.objects.create( - team=other_team, project_id=other_team.project_id, group_type="playlist", group_type_index=1 - ) - GroupTypeMapping.objects.create( - team=other_team, project_id=other_team.project_id, group_type="another", group_type_index=2 - ) - - response = self.client.get(f"/api/projects/{other_team.id}/groups_types") # No access to this project - - self.assertEqual(response.status_code, 403, response.json()) - self.assertEqual( - response.json(), - self.permission_denied_response("You don't have access to the project."), - ) - - def test_cannot_list_group_types_of_another_org_with_sharing_token(self): - sharing_configuration = SharingConfiguration.objects.create(team=self.team, enabled=True) - - other_org = Organization.objects.create(name="other org") - other_team = Team.objects.create(organization=other_org, name="other project") - - GroupTypeMapping.objects.create( - team=other_team, project_id=other_team.project_id, group_type="organization", group_type_index=0 - ) - GroupTypeMapping.objects.create( - team=other_team, project_id=other_team.project_id, group_type="playlist", group_type_index=1 - ) - GroupTypeMapping.objects.create( - team=other_team, project_id=other_team.project_id, group_type="another", group_type_index=2 - ) - - response = self.client.get( - f"/api/projects/{other_team.id}/groups_types/?sharing_access_token={sharing_configuration.access_token}" - ) - - self.assertEqual(response.status_code, 403, response.json()) - self.assertEqual( - response.json(), - self.permission_denied_response("You do not have permission to perform this action."), - ) - - def test_can_list_group_types_of_another_org_with_sharing_access_token(self): - other_org = Organization.objects.create(name="other org") - other_team = Team.objects.create(organization=other_org, name="other project") - sharing_configuration = SharingConfiguration.objects.create(team=other_team, enabled=True) - - GroupTypeMapping.objects.create( - team=other_team, project_id=other_team.project_id, group_type="organization", group_type_index=0 - ) - GroupTypeMapping.objects.create( - team=other_team, project_id=other_team.project_id, group_type="playlist", group_type_index=1 - ) - GroupTypeMapping.objects.create( - team=other_team, project_id=other_team.project_id, group_type="another", group_type_index=2 - ) - - disabled_response = self.client.get( - f"/api/projects/{other_team.id}/groups_types/?sharing_access_token={sharing_configuration.access_token}" - ).json() - - self.assertEqual( - disabled_response, - [ - { - "group_type_index": 0, - "group_type": "organization", - "name_singular": None, - "name_plural": None, - }, - { - "group_type_index": 1, - "group_type": "playlist", - "name_singular": None, - "name_plural": None, - }, - { - "group_type_index": 2, - "group_type": "another", - "name_singular": None, - "name_plural": None, - }, - ], - ) - - # Disable the config now - sharing_configuration.enabled = False - sharing_configuration.save() - - disabled_response = self.client.get( - f"/api/projects/{other_team.id}/groups_types?sharing_access_token={sharing_configuration.access_token}" - ) - - self.assertEqual(disabled_response.status_code, 403, disabled_response.json()) - self.assertEqual( - disabled_response.json(), - self.unauthenticated_response("Sharing access token is invalid.", "authentication_failed"), - ) - - def _create_related_groups_data(self): - GroupTypeMapping.objects.create( - team=self.team, project_id=self.team.project_id, group_type="organization", group_type_index=0 - ) - GroupTypeMapping.objects.create( - team=self.team, project_id=self.team.project_id, group_type="playlist", group_type_index=1 - ) - - uuid = UUID("01795392-cc00-0003-7dc7-67a694604d72") - - Person.objects.create(uuid=uuid, team_id=self.team.pk, distinct_ids=["1", "2"]) - Person.objects.create(team_id=self.team.pk, distinct_ids=["3"]) - Person.objects.create(team_id=self.team.pk, distinct_ids=["4"]) - - create_group(self.team.pk, 0, "0::0") - create_group(self.team.pk, 0, "0::1") - create_group(self.team.pk, 1, "1::2") - create_group(self.team.pk, 1, "1::3") - create_group(self.team.pk, 1, "1::4") - create_group(self.team.pk, 1, "1::5") - - _create_event( - event="$pageview", - team=self.team, - distinct_id="1", - timestamp="2021-05-05 00:00:00", - properties={"$group_0": "0::0", "$group_1": "1::2"}, - ) - - _create_event( - event="$pageview", - team=self.team, - distinct_id="1", - timestamp="2021-05-05 00:00:00", - properties={"$group_0": "0::0", "$group_1": "1::3"}, - ) - - _create_event( - event="$pageview", - team=self.team, - distinct_id="1", - timestamp="2021-05-05 00:00:00", - properties={"$group_0": "0::1", "$group_1": "1::3"}, - ) - - # Event too old, not counted - _create_event( - event="$pageview", - team=self.team, - distinct_id="1", - timestamp="2000-05-05 00:00:00", - properties={"$group_0": "0::0", "$group_1": "1::4"}, - ) - - # No such group exists in groups table - _create_event( - event="$pageview", - team=self.team, - distinct_id="1", - timestamp="2000-05-05 00:00:00", - properties={"$group_0": "0::0", "$group_1": "no such group"}, - ) - - return uuid diff --git a/ee/clickhouse/views/test/test_clickhouse_stickiness.py b/ee/clickhouse/views/test/test_clickhouse_stickiness.py deleted file mode 100644 index a2a58151db..0000000000 --- a/ee/clickhouse/views/test/test_clickhouse_stickiness.py +++ /dev/null @@ -1,212 +0,0 @@ -from datetime import datetime, timedelta - -from django.test.client import Client -from freezegun.api import freeze_time - -from ee.clickhouse.queries.stickiness import ClickhouseStickiness -from posthog.api.test.test_stickiness import ( - get_stickiness_time_series_ok, - stickiness_test_factory, -) -from posthog.models.action import Action -from posthog.models.filters.stickiness_filter import StickinessFilter -from posthog.models.group.util import create_group -from posthog.queries.util import get_earliest_timestamp -from posthog.test.base import ( - ClickhouseTestMixin, - _create_event, - _create_person, - snapshot_clickhouse_queries, -) -from posthog.test.test_journeys import journeys_for - - -def _create_action(**kwargs): - team = kwargs.pop("team") - name = kwargs.pop("name") - event_name = kwargs.pop("event_name") - action = Action.objects.create(team=team, name=name, steps_json=[{"event": event_name}]) - return action - - -def get_people_from_url_ok(client: Client, url: str): - response = client.get("/" + url) - assert response.status_code == 200, response.content - return response.json()["results"][0]["people"] - - -class TestClickhouseStickiness( - ClickhouseTestMixin, - stickiness_test_factory( - ClickhouseStickiness, - _create_event, - _create_person, - _create_action, - get_earliest_timestamp, - ), -): # type: ignore - @snapshot_clickhouse_queries - def test_filter_by_group_properties(self): - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key=f"org:1", - properties={"industry": "technology"}, - ) - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key=f"org:2", - properties={"industry": "agriculture"}, - ) - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key=f"org:3", - properties={"industry": "technology"}, - ) - create_group(team_id=self.team.pk, group_type_index=0, group_key=f"org:4", properties={}) - create_group( - team_id=self.team.pk, - group_type_index=1, - group_key=f"company:1", - properties={"industry": "technology"}, - ) - create_group( - team_id=self.team.pk, - group_type_index=1, - group_key=f"instance:1", - properties={}, - ) - - self._create_multiple_people( - period=timedelta(weeks=1), - event_properties=lambda i: { - "$group_0": f"org:{i}", - "$group_1": "instance:1", - }, - ) - - with freeze_time("2020-02-15T13:01:01Z"): - data = get_stickiness_time_series_ok( - client=self.client, - team=self.team, - request={ - "shown_as": "Stickiness", - "date_from": "2020-01-01", - "date_to": "2020-02-15", - "events": [{"id": "watched movie"}], - "properties": [ - { - "key": "industry", - "value": "technology", - "type": "group", - "group_type_index": 0, - } - ], - "interval": "week", - }, - ) - - assert data["watched movie"][1].value == 1 - assert data["watched movie"][2].value == 0 - assert data["watched movie"][3].value == 1 - - @snapshot_clickhouse_queries - def test_aggregate_by_groups(self): - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key=f"org:0", - properties={"industry": "technology"}, - ) - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key=f"org:1", - properties={"industry": "agriculture"}, - ) - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key=f"org:2", - properties={"industry": "technology"}, - ) - self._create_multiple_people( - period=timedelta(weeks=1), - event_properties=lambda i: {"$group_0": f"org:{i // 2}"}, - ) - - with freeze_time("2020-02-15T13:01:01Z"): - data = get_stickiness_time_series_ok( - client=self.client, - team=self.team, - request={ - "shown_as": "Stickiness", - "date_from": "2020-01-01", - "date_to": "2020-02-15", - "events": [ - { - "id": "watched movie", - "math": "unique_group", - "math_group_type_index": 0, - } - ], - "interval": "week", - }, - ) - - assert data["watched movie"][1].value == 2 - assert data["watched movie"][2].value == 0 - assert data["watched movie"][3].value == 1 - - @snapshot_clickhouse_queries - def test_timezones(self): - journeys_for( - { - "person1": [ - { - "event": "$pageview", - "timestamp": datetime(2021, 5, 2, 1), - }, # this time will fall on 5/1 in US Pacific - {"event": "$pageview", "timestamp": datetime(2021, 5, 2, 9)}, - {"event": "$pageview", "timestamp": datetime(2021, 5, 4, 3)}, - ] - }, - self.team, - ) - - data = ClickhouseStickiness().run( - filter=StickinessFilter( - data={ - "shown_as": "Stickiness", - "date_from": "2021-05-01", - "date_to": "2021-05-15", - "events": [{"id": "$pageview"}], - }, - team=self.team, - ), - team=self.team, - ) - - self.assertEqual(data[0]["days"], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) - self.assertEqual(data[0]["data"], [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) - - self.team.timezone = "US/Pacific" - self.team.save() - - data_pacific = ClickhouseStickiness().run( - filter=StickinessFilter( - data={ - "shown_as": "Stickiness", - "date_from": "2021-05-01", - "date_to": "2021-05-15", - "events": [{"id": "$pageview"}], - }, - team=self.team, - ), - team=self.team, - ) - - self.assertEqual(data_pacific[0]["days"], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) - self.assertEqual(data_pacific[0]["data"], [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) diff --git a/ee/clickhouse/views/test/test_clickhouse_trends.py b/ee/clickhouse/views/test/test_clickhouse_trends.py deleted file mode 100644 index dc31caa952..0000000000 --- a/ee/clickhouse/views/test/test_clickhouse_trends.py +++ /dev/null @@ -1,1380 +0,0 @@ -import json -from dataclasses import dataclass, field -from datetime import datetime -from typing import Any, Optional, Union -from unittest.case import skip -from unittest.mock import ANY - -import pytest -from django.core.cache import cache -from django.test import Client -from freezegun import freeze_time - -from ee.api.test.base import LicensedTestMixin -from posthog.api.test.test_cohort import create_cohort_ok -from posthog.api.test.test_event_definition import ( - create_organization, - create_team, - create_user, -) -from posthog.models.group.util import create_group -from posthog.models.group_type_mapping import GroupTypeMapping -from posthog.models.instance_setting import set_instance_setting -from posthog.models.team import Team -from posthog.test.base import ( - APIBaseTest, - ClickhouseTestMixin, - _create_person, - also_test_with_materialized_columns, - snapshot_clickhouse_queries, -) -from posthog.test.test_journeys import journeys_for, update_or_create_person - - -@pytest.mark.django_db -@pytest.mark.ee -def test_includes_only_intervals_within_range(client: Client): - """ - This is the case highlighted by https://github.com/PostHog/posthog/issues/2675 - - Here the issue is that we request, for instance, 14 days as the - date_from, display at weekly intervals but previously we - were displaying 4 ticks on the date axis. If we were exactly on the - beginning of the week for two weeks then we'd want 2 ticks. - Otherwise we would have 3 ticks as the range would be intersecting - with three weeks. We should never need to display 4 ticks. - """ - organization = create_organization(name="test org") - team = create_team(organization=organization) - user = create_user("user", "pass", organization) - - client.force_login(user) - cache.clear() - - # Β I'm creating a cohort here so that I can use as a breakdown, just because - # Β this is what was used demonstrated in - # Β https://github.com/PostHog/posthog/issues/2675 but it might not be the - # Β simplest way to reproduce - - # "2021-09-19" is a sunday, i.e. beginning of week - with freeze_time("2021-09-20T16:00:00"): - # Β First identify as a member of the cohort - distinct_id = "abc" - update_or_create_person( - distinct_ids=[distinct_id], - team_id=team.id, - properties={"cohort_identifier": 1}, - ) - cohort = create_cohort_ok( - client=client, - team_id=team.id, - name="test cohort", - groups=[{"properties": [{"key": "cohort_identifier", "value": 1, "type": "person"}]}], - ) - - journeys_for( - events_by_person={ - distinct_id: [ - {"event": "$pageview", "timestamp": "2021-09-04"}, - {"event": "$pageview", "timestamp": "2021-09-05"}, - {"event": "$pageview", "timestamp": "2021-09-12"}, - {"event": "$pageview", "timestamp": "2021-09-19"}, - ] - }, - team=team, - create_people=False, - ) - - trends = get_trends_ok( - client, - team=team, - request=TrendsRequestBreakdown( - date_from="-14days", - date_to="2021-09-21", - interval="week", - insight="TRENDS", - breakdown=json.dumps([cohort["id"]]), - breakdown_type="cohort", - display="ActionsLineGraph", - events=[ - { - "id": "$pageview", - "math": "dau", - "name": "$pageview", - "custom_name": None, - "type": "events", - "order": 0, - "properties": [], - "math_property": None, - } - ], - ), - ) - assert trends == trends | { - "is_cached": False, - "last_refresh": "2021-09-20T16:00:00Z", - "next": None, - "timezone": "UTC", - "result": [ - { - "action": ANY, - "breakdown_value": cohort["id"], - "label": "test cohort", - "count": 3.0, - "data": [1.0, 1.0, 1.0], - # Prior to the fix this would also include '29-Aug-2021' - "labels": ["5-Sep-2021", "12-Sep-2021", "19-Sep-2021"], - "days": ["2021-09-05", "2021-09-12", "2021-09-19"], - "filter": ANY, - } - ], - } - - -@pytest.mark.django_db -@pytest.mark.ee -def test_can_specify_number_of_smoothing_intervals(client: Client): - """ - The Smoothing feature should allow specifying a number of intervals over - which we will provide smoothing of the aggregated trend data. - """ - organization = create_organization(name="test org") - team = create_team(organization=organization) - user = create_user("user", "pass", organization) - - client.force_login(user) - - with freeze_time("2021-09-20T16:00:00"): - journeys_for( - events_by_person={ - "abc": [ - {"event": "$pageview", "timestamp": "2021-09-01"}, - {"event": "$pageview", "timestamp": "2021-09-01"}, - {"event": "$pageview", "timestamp": "2021-09-02"}, - {"event": "$pageview", "timestamp": "2021-09-03"}, - {"event": "$pageview", "timestamp": "2021-09-03"}, - {"event": "$pageview", "timestamp": "2021-09-03"}, - ] - }, - team=team, - ) - - interval_3_trend = get_trends_ok( - client, - team=team, - request=TrendsRequest( - date_from="2021-09-01", - date_to="2021-09-03", - interval="day", - insight="TRENDS", - display="ActionsLineGraph", - smoothing_intervals=3, - events=[ - { - "id": "$pageview", - "name": "$pageview", - "custom_name": None, - "type": "events", - "order": 0, - "properties": [], - } - ], - ), - ) - - assert interval_3_trend == interval_3_trend | { - "is_cached": False, - "last_refresh": "2021-09-20T16:00:00Z", - "next": None, - "timezone": "UTC", - "result": [ - { - "action": ANY, - "label": "$pageview", - "count": 5, - "data": [2.0, 1, 2.0], - "labels": ["1-Sep-2021", "2-Sep-2021", "3-Sep-2021"], - "days": ["2021-09-01", "2021-09-02", "2021-09-03"], - "filter": ANY, - } - ], - } - - interval_2_trend = get_trends_ok( - client, - team=team, - request=TrendsRequest( - date_from="2021-09-01", - date_to="2021-09-03", - interval="day", - insight="TRENDS", - display="ActionsLineGraph", - smoothing_intervals=2, - events=[ - { - "id": "$pageview", - "name": "$pageview", - "custom_name": None, - "type": "events", - "order": 0, - "properties": [], - } - ], - ), - ) - - assert interval_2_trend == interval_2_trend | { - "is_cached": False, - "last_refresh": "2021-09-20T16:00:00Z", - "next": None, - "timezone": "UTC", - "result": [ - { - "action": ANY, - "label": "$pageview", - "count": 5, - "data": [2.0, 1, 2.0], - "labels": ["1-Sep-2021", "2-Sep-2021", "3-Sep-2021"], - "days": ["2021-09-01", "2021-09-02", "2021-09-03"], - "filter": ANY, - } - ], - } - - interval_1_trend = get_trends_ok( - client, - team=team, - request=TrendsRequest( - date_from="2021-09-01", - date_to="2021-09-03", - interval="day", - insight="TRENDS", - display="ActionsLineGraph", - smoothing_intervals=1, - events=[ - { - "id": "$pageview", - "name": "$pageview", - "custom_name": None, - "type": "events", - "order": 0, - "properties": [], - } - ], - ), - ) - - assert interval_1_trend == interval_1_trend | { - "is_cached": False, - "last_refresh": "2021-09-20T16:00:00Z", - "next": None, - "timezone": "UTC", - "result": [ - { - "action": { - "id": "$pageview", - "type": "events", - "order": 0, - "name": "$pageview", - "custom_name": None, - "math": None, - "math_hogql": None, - "math_property": None, - "math_group_type_index": ANY, - "properties": {}, - "days": ["2021-09-01T00:00:00Z", "2021-09-02T00:00:00Z", "2021-09-03T00:00:00Z"], - }, - "label": "$pageview", - "count": 6.0, - "data": [2, 1, 3], - "labels": ["1-Sep-2021", "2-Sep-2021", "3-Sep-2021"], - "days": ["2021-09-01", "2021-09-02", "2021-09-03"], - "filter": ANY, - } - ], - } - - -@pytest.mark.django_db -@pytest.mark.ee -def test_smoothing_intervals_copes_with_null_values(client: Client): - """ - The Smoothing feature should allow specifying a number of intervals over - which we will provide smoothing of the aggregated trend data. - """ - organization = create_organization(name="test org") - team = create_team(organization=organization) - user = create_user("user", "pass", organization) - - client.force_login(user) - cache.clear() - - with freeze_time("2021-09-20T16:00:00"): - journeys_for( - events_by_person={ - "abc": [ - {"event": "$pageview", "timestamp": "2021-09-01"}, - {"event": "$pageview", "timestamp": "2021-09-01"}, - {"event": "$pageview", "timestamp": "2021-09-01"}, - # No events on 2 Sept - {"event": "$pageview", "timestamp": "2021-09-03"}, - {"event": "$pageview", "timestamp": "2021-09-03"}, - {"event": "$pageview", "timestamp": "2021-09-03"}, - ] - }, - team=team, - ) - - interval_3_trend = get_trends_ok( - client, - team=team, - request=TrendsRequest( - date_from="2021-09-01", - date_to="2021-09-03", - interval="day", - insight="TRENDS", - display="ActionsLineGraph", - smoothing_intervals=3, - events=[ - { - "id": "$pageview", - "name": "$pageview", - "custom_name": None, - "type": "events", - "order": 0, - "properties": [], - } - ], - ), - ) - - assert interval_3_trend == interval_3_trend | { - "is_cached": False, - "last_refresh": "2021-09-20T16:00:00Z", - "next": None, - "timezone": "UTC", - "result": [ - { - "action": ANY, - "label": "$pageview", - "count": 6.0, - "data": [3.0, 1.0, 2.0], - "labels": ["1-Sep-2021", "2-Sep-2021", "3-Sep-2021"], - "days": ["2021-09-01", "2021-09-02", "2021-09-03"], - "filter": ANY, - } - ], - } - - interval_1_trend = get_trends_ok( - client, - team=team, - request=TrendsRequest( - date_from="2021-09-01", - date_to="2021-09-03", - interval="day", - insight="TRENDS", - display="ActionsLineGraph", - smoothing_intervals=1, - events=[ - { - "id": "$pageview", - "name": "$pageview", - "custom_name": None, - "type": "events", - "order": 0, - "properties": [], - } - ], - ), - ) - - assert interval_1_trend == interval_1_trend | { - "is_cached": False, - "last_refresh": "2021-09-20T16:00:00Z", - "next": None, - "timezone": "UTC", - "result": [ - { - "action": ANY, - "label": "$pageview", - "count": 6.0, - "data": [3.0, 0.0, 3.0], - "labels": ["1-Sep-2021", "2-Sep-2021", "3-Sep-2021"], - "days": ["2021-09-01", "2021-09-02", "2021-09-03"], - "filter": ANY, - } - ], - } - - -@dataclass -class TrendsRequest: - date_from: Optional[str] = None - date_to: Optional[str] = None - interval: Optional[str] = None - insight: Optional[str] = None - display: Optional[str] = None - compare: Optional[bool] = None - events: list[dict[str, Any]] = field(default_factory=list) - properties: list[dict[str, Any]] = field(default_factory=list) - smoothing_intervals: Optional[int] = 1 - refresh: Optional[bool] = False - - -@dataclass -class TrendsRequestBreakdown(TrendsRequest): - breakdown: Optional[Union[list[int], str]] = None - breakdown_type: Optional[str] = None - - -def get_trends(client, request: Union[TrendsRequestBreakdown, TrendsRequest], team: Team): - data: dict[str, Any] = { - "date_from": request.date_from, - "date_to": request.date_to, - "interval": request.interval, - "insight": request.insight, - "display": request.display, - "compare": request.compare, - "events": json.dumps(request.events), - "properties": json.dumps(request.properties), - "smoothing_intervals": request.smoothing_intervals, - "refresh": request.refresh, - } - - if isinstance(request, TrendsRequestBreakdown): - data["breakdown"] = request.breakdown - data["breakdown_type"] = request.breakdown_type - - filtered_data = {k: v for k, v in data.items() if v is not None} - - return client.get(f"/api/projects/{team.id}/insights/trend/", data=filtered_data) - - -def get_trends_ok(client: Client, request: TrendsRequest, team: Team): - response = get_trends(client=client, request=request, team=team) - assert response.status_code == 200, response.content - return response.json() - - -@dataclass -class NormalizedTrendResult: - value: float - label: str - breakdown_value: Optional[Union[str, int]] - - -def get_trends_time_series_ok( - client: Client, request: TrendsRequest, team: Team, with_order: bool = False -) -> dict[str, dict[str, NormalizedTrendResult]]: - data = get_trends_ok(client=client, request=request, team=team) - res = {} - for item in data["result"]: - collect_dates = {} - for idx, date in enumerate(item["days"]): - collect_dates[date] = NormalizedTrendResult( - value=item["data"][idx], - label=item["labels"][idx], - breakdown_value=item.get("breakdown_value", None), - ) - suffix = " - {}".format(item["compare_label"]) if item.get("compare_label") else "" - if with_order: - suffix += " - {}".format(item["action"]["order"]) if item["action"].get("order") is not None else "" - res["{}{}".format(item["label"], suffix)] = collect_dates - - return res - - -def get_trends_aggregate_ok(client: Client, request: TrendsRequest, team: Team) -> dict[str, NormalizedTrendResult]: - data = get_trends_ok(client=client, request=request, team=team) - res = {} - for item in data["result"]: - res[item["label"]] = NormalizedTrendResult( - value=item["aggregated_value"], - label=item["action"]["name"], - breakdown_value=item.get("breakdown_value", None), - ) - - return res - - -class ClickhouseTestTrends(ClickhouseTestMixin, LicensedTestMixin, APIBaseTest): - maxDiff = None - CLASS_DATA_LEVEL_SETUP = False - - @snapshot_clickhouse_queries - def test_insight_trends_basic(self): - events_by_person = { - "1": [{"event": "$pageview", "timestamp": datetime(2012, 1, 14, 3)}], - "2": [{"event": "$pageview", "timestamp": datetime(2012, 1, 14, 3)}], - } - journeys_for(events_by_person, self.team) - - with freeze_time("2012-01-15T04:01:34.000Z"): - request = TrendsRequest( - date_from="-14d", - display="ActionsLineGraph", - events=[ - { - "id": "$pageview", - "math": "dau", - "name": "$pageview", - "custom_name": None, - "type": "events", - "order": 0, - "properties": [], - "math_property": None, - } - ], - ) - data = get_trends_time_series_ok(self.client, request, self.team) - - assert data["$pageview"]["2012-01-13"].value == 0 - assert data["$pageview"]["2012-01-14"].value == 2 - assert data["$pageview"]["2012-01-14"].label == "14-Jan-2012" - assert data["$pageview"]["2012-01-15"].value == 0 - - def test_insight_trends_entity_overlap(self): - events_by_person = { - "1": [ - { - "event": "$pageview", - "timestamp": datetime(2012, 1, 14, 3), - "properties": {"key": "val"}, - } - ], - "2": [{"event": "$pageview", "timestamp": datetime(2012, 1, 14, 3)}], - "3": [{"event": "$pageview", "timestamp": datetime(2012, 1, 14, 3)}], - } - journeys_for(events_by_person, self.team) - - with freeze_time("2012-01-15T04:01:34.000Z"): - request = TrendsRequest( - date_from="-14d", - display="ActionsLineGraph", - events=[ - { - "id": "$pageview", - "math": "dau", - "name": "$pageview", - "custom_name": None, - "type": "events", - "order": 0, - "properties": [], - "math_property": None, - }, - { - "id": "$pageview", - "math": "dau", - "name": "$pageview", - "custom_name": None, - "type": "events", - "order": 1, - "properties": [{"key": "key", "value": "val"}], - "math_property": None, - }, - ], - ) - data = get_trends_time_series_ok(self.client, request, self.team, with_order=True) - - assert data["$pageview - 0"]["2012-01-13"].value == 0 - assert data["$pageview - 0"]["2012-01-14"].value == 3 - assert data["$pageview - 1"]["2012-01-14"].value == 1 - assert data["$pageview - 0"]["2012-01-14"].label == "14-Jan-2012" - assert data["$pageview - 0"]["2012-01-15"].value == 0 - - @snapshot_clickhouse_queries - def test_insight_trends_aggregate(self): - events_by_person = { - "1": [{"event": "$pageview", "timestamp": datetime(2012, 1, 13, 3)}], - "2": [{"event": "$pageview", "timestamp": datetime(2012, 1, 14, 3)}], - } - journeys_for(events_by_person, self.team) - - with freeze_time("2012-01-15T04:01:34.000Z"): - request = TrendsRequest( - date_from="-14d", - display="ActionsPie", - events=[ - { - "id": "$pageview", - "math": None, - "name": "$pageview", - "custom_name": None, - "type": "events", - "order": 0, - "properties": [], - "math_property": None, - } - ], - ) - data = get_trends_aggregate_ok(self.client, request, self.team) - - assert data["$pageview"].value == 2 - assert data["$pageview"].label == "$pageview" - - @snapshot_clickhouse_queries - def test_insight_trends_cumulative(self): - _create_person(team_id=self.team.pk, distinct_ids=["p1"], properties={"key": "some_val"}) - _create_person(team_id=self.team.pk, distinct_ids=["p2"], properties={"key": "some_val"}) - _create_person(team_id=self.team.pk, distinct_ids=["p3"], properties={"key": "some_val"}) - - events_by_person = { - "p1": [ - { - "event": "$pageview", - "timestamp": datetime(2012, 1, 13, 3), - "properties": {"key": "val"}, - }, - { - "event": "$pageview", - "timestamp": datetime(2012, 1, 14, 3), - "properties": {"key": "val"}, - }, - ], - "p2": [ - { - "event": "$pageview", - "timestamp": datetime(2012, 1, 13, 3), - "properties": {"key": "notval"}, - } - ], - "p3": [ - { - "event": "$pageview", - "timestamp": datetime(2012, 1, 14, 3), - "properties": {"key": "val"}, - } - ], - } - journeys_for(events_by_person, self.team, create_people=False) - - # Total Volume - with freeze_time("2012-01-15T04:01:34.000Z"): - request = TrendsRequest( - date_from="-14d", - display="ActionsLineGraphCumulative", - events=[ - { - "id": "$pageview", - "math": None, - "name": "$pageview", - "custom_name": None, - "type": "events", - "order": 0, - "properties": [], - "math_property": None, - } - ], - ) - data_response = get_trends_time_series_ok(self.client, request, self.team) - - assert data_response["$pageview"]["2012-01-13"].value == 2 - assert data_response["$pageview"]["2012-01-14"].value == 4 - assert data_response["$pageview"]["2012-01-15"].value == 4 - assert data_response["$pageview"]["2012-01-14"].label == "14-Jan-2012" - - # DAU - - with freeze_time("2012-01-15T04:01:34.000Z"): - request = TrendsRequest( - date_from="-14d", - display="ActionsLineGraphCumulative", - events=[ - { - "id": "$pageview", - "math": "dau", - "name": "$pageview", - "custom_name": None, - "type": "events", - "order": 0, - "properties": [], - "math_property": None, - } - ], - ) - data_response = get_trends_time_series_ok(self.client, request, self.team) - - assert data_response["$pageview"]["2012-01-13"].value == 2 - assert data_response["$pageview"]["2012-01-14"].value == 3 - assert data_response["$pageview"]["2012-01-15"].value == 3 - assert data_response["$pageview"]["2012-01-14"].label == "14-Jan-2012" - - # breakdown - with freeze_time("2012-01-15T04:01:34.000Z"): - request = TrendsRequestBreakdown( - date_from="-14d", - display="ActionsLineGraphCumulative", - breakdown="key", - breakdown_type="event", - events=[ - { - "id": "$pageview", - "math": None, - "name": "$pageview", - "custom_name": None, - "type": "events", - "order": 0, - "properties": [], - "math_property": None, - } - ], - ) - data_response = get_trends_time_series_ok(self.client, request, self.team) - - assert data_response["val"]["2012-01-13"].value == 1 - assert data_response["val"]["2012-01-13"].breakdown_value == "val" - assert data_response["val"]["2012-01-14"].value == 3 - assert data_response["val"]["2012-01-14"].label == "14-Jan-2012" - - # breakdown wau - with freeze_time("2012-01-15T04:01:34.000Z"): - request = TrendsRequestBreakdown( - date_from="-14d", - display="ActionsLineGraphCumulative", - breakdown="key", - breakdown_type="event", - events=[ - { - "id": "$pageview", - "math": "weekly_active", - "name": "$pageview", - "custom_name": None, - "type": "events", - "order": 0, - "properties": [{"type": "person", "key": "key", "value": "some_val"}], - "math_property": None, - } - ], - properties=[{"type": "person", "key": "key", "value": "some_val"}], - ) - data_response = get_trends_time_series_ok(self.client, request, self.team) - - assert data_response["val"]["2012-01-13"].value == 1 - assert data_response["val"]["2012-01-13"].breakdown_value == "val" - assert data_response["val"]["2012-01-14"].value == 3 - assert data_response["val"]["2012-01-14"].label == "14-Jan-2012" - - # breakdown dau - with freeze_time("2012-01-15T04:01:34.000Z"): - request = TrendsRequestBreakdown( - date_from="-14d", - display="ActionsLineGraphCumulative", - breakdown="key", - breakdown_type="event", - events=[ - { - "id": "$pageview", - "math": "dau", - "name": "$pageview", - "custom_name": None, - "type": "events", - "order": 0, - "properties": [], - "math_property": None, - } - ], - ) - data_response = get_trends_time_series_ok(self.client, request, self.team) - - assert data_response["val"]["2012-01-13"].value == 1 - assert data_response["val"]["2012-01-13"].breakdown_value == "val" - assert data_response["val"]["2012-01-14"].value == 2 - assert data_response["val"]["2012-01-14"].label == "14-Jan-2012" - - @also_test_with_materialized_columns(["key"]) - def test_breakdown_with_filter(self): - events_by_person = { - "person1": [ - { - "event": "sign up", - "timestamp": datetime(2012, 1, 13, 3), - "properties": {"key": "val"}, - } - ], - "person2": [ - { - "event": "sign up", - "timestamp": datetime(2012, 1, 13, 3), - "properties": {"key": "oh"}, - } - ], - } - journeys_for(events_by_person, self.team) - - with freeze_time("2012-01-15T04:01:34.000Z"): - params = TrendsRequestBreakdown( - date_from="-14d", - breakdown="key", - events=[{"id": "sign up", "name": "sign up", "type": "events", "order": 0}], - properties=[{"key": "key", "value": "oh", "operator": "not_icontains"}], - ) - data_response = get_trends_time_series_ok(self.client, params, self.team) - - assert data_response["val"]["2012-01-13"].value == 1 - assert data_response["val"]["2012-01-13"].breakdown_value == "val" - - with freeze_time("2012-01-15T04:01:34.000Z"): - params = TrendsRequestBreakdown( - date_from="-14d", - breakdown="key", - display="ActionsPie", - events=[{"id": "sign up", "name": "sign up", "type": "events", "order": 0}], - ) - aggregate_response = get_trends_aggregate_ok(self.client, params, self.team) - - assert aggregate_response["val"].value == 1 - - def test_insight_trends_compare(self): - events_by_person = { - "p1": [ - { - "event": "$pageview", - "timestamp": datetime(2012, 1, 5, 3), - "properties": {"key": "val"}, - }, - { - "event": "$pageview", - "timestamp": datetime(2012, 1, 14, 3), - "properties": {"key": "val"}, - }, - ], - "p2": [ - { - "event": "$pageview", - "timestamp": datetime(2012, 1, 5, 3), - "properties": {"key": "notval"}, - }, - { - "event": "$pageview", - "timestamp": datetime(2012, 1, 14, 3), - "properties": {"key": "notval"}, - }, - ], - } - journeys_for(events_by_person, self.team) - - with freeze_time("2012-01-15T04:01:34.000Z"): - request = TrendsRequest( - date_from="-7d", - compare=True, - events=[ - { - "id": "$pageview", - "name": "$pageview", - "type": "events", - "order": 0, - } - ], - ) - data_response = get_trends_time_series_ok(self.client, request, self.team) - - assert data_response["$pageview - current"]["2012-01-13"].value == 0 - assert data_response["$pageview - current"]["2012-01-14"].value == 2 - - assert data_response["$pageview - previous"]["2012-01-04"].value == 0 - assert data_response["$pageview - previous"]["2012-01-05"].value == 2 - - -class ClickhouseTestTrendsGroups(ClickhouseTestMixin, LicensedTestMixin, APIBaseTest): - maxDiff = None - CLASS_DATA_LEVEL_SETUP = False - - def _create_groups(self): - GroupTypeMapping.objects.create( - team=self.team, project_id=self.team.project_id, group_type="organization", group_type_index=0 - ) - GroupTypeMapping.objects.create( - team=self.team, project_id=self.team.project_id, group_type="company", group_type_index=1 - ) - - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key="org:5", - properties={"industry": "finance"}, - ) - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key="org:6", - properties={"industry": "technology"}, - ) - create_group( - team_id=self.team.pk, - group_type_index=0, - group_key="org:7", - properties={"industry": "finance"}, - ) - create_group( - team_id=self.team.pk, - group_type_index=1, - group_key="company:10", - properties={"industry": "finance"}, - ) - - @snapshot_clickhouse_queries - def test_aggregating_by_group(self): - self._create_groups() - - events_by_person = { - "person1": [ - { - "event": "$pageview", - "timestamp": datetime(2020, 1, 2, 12), - "properties": {"$group_0": "org:5"}, - }, - { - "event": "$pageview", - "timestamp": datetime(2020, 1, 2, 12), - "properties": {"$group_0": "org:6"}, - }, - { - "event": "$pageview", - "timestamp": datetime(2020, 1, 2, 12), - "properties": {"$group_0": "org:6", "$group_1": "company:10"}, - }, - ] - } - journeys_for(events_by_person, self.team) - - request = TrendsRequest( - date_from="2020-01-01 00:00:00", - date_to="2020-01-12 00:00:00", - events=[ - { - "id": "$pageview", - "type": "events", - "order": 0, - "math": "unique_group", - "math_group_type_index": 0, - } - ], - ) - data_response = get_trends_time_series_ok(self.client, request, self.team) - - assert data_response["$pageview"]["2020-01-01"].value == 0 - assert data_response["$pageview"]["2020-01-02"].value == 2 - - @snapshot_clickhouse_queries - def test_aggregating_by_session(self): - events_by_person = { - "person1": [ - { - "event": "$pageview", - "timestamp": datetime(2020, 1, 1, 12), - "properties": {"$session_id": "1"}, - }, - { - "event": "$pageview", - "timestamp": datetime(2020, 1, 1, 12), - "properties": {"$session_id": "1"}, - }, - { - "event": "$pageview", - "timestamp": datetime(2020, 1, 2, 12), - "properties": {"$session_id": "2"}, - }, - ], - "person2": [ - { - "event": "$pageview", - "timestamp": datetime(2020, 1, 2, 12), - "properties": {"$session_id": "3"}, - } - ], - } - journeys_for(events_by_person, self.team) - - request = TrendsRequest( - date_from="2020-01-01 00:00:00", - date_to="2020-01-12 00:00:00", - events=[ - { - "id": "$pageview", - "type": "events", - "order": 0, - "math": "unique_session", - } - ], - ) - data_response = get_trends_time_series_ok(self.client, request, self.team) - - assert data_response["$pageview"]["2020-01-01"].value == 1 - assert data_response["$pageview"]["2020-01-02"].value == 2 - - -class ClickhouseTestTrendsCaching(ClickhouseTestMixin, LicensedTestMixin, APIBaseTest): - maxDiff = None - CLASS_DATA_LEVEL_SETUP = False - - @snapshot_clickhouse_queries - def test_insight_trends_merging(self): - set_instance_setting("STRICT_CACHING_TEAMS", "all") - - events_by_person = { - "1": [{"event": "$pageview", "timestamp": datetime(2012, 1, 13, 3)}], - "2": [{"event": "$pageview", "timestamp": datetime(2012, 1, 13, 3)}], - } - journeys_for(events_by_person, self.team) - - with freeze_time("2012-01-15T04:01:34.000Z"): - request = TrendsRequest( - date_from="-14d", - display="ActionsLineGraph", - events=[ - { - "id": "$pageview", - "math": "dau", - "name": "$pageview", - "custom_name": None, - "type": "events", - "order": 0, - "properties": [], - "math_property": None, - } - ], - ) - data = get_trends_time_series_ok(self.client, request, self.team) - - assert data["$pageview"]["2012-01-13"].value == 2 - assert data["$pageview"]["2012-01-14"].value == 0 - assert data["$pageview"]["2012-01-15"].value == 0 - - events_by_person = {"1": [{"event": "$pageview", "timestamp": datetime(2012, 1, 15, 3)}]} - journeys_for(events_by_person, self.team) - - with freeze_time("2012-01-15T04:01:34.000Z"): - request = TrendsRequest( - date_from="-14d", - display="ActionsLineGraph", - events=[ - { - "id": "$pageview", - "math": "dau", - "name": "$pageview", - "custom_name": None, - "type": "events", - "order": 0, - "properties": [], - "math_property": None, - } - ], - refresh=True, - ) - data = get_trends_time_series_ok(self.client, request, self.team) - - assert data["$pageview"]["2012-01-13"].value == 2 - assert data["$pageview"]["2012-01-14"].value == 0 - assert data["$pageview"]["2012-01-15"].value == 1 - - @skip("Don't handle breakdowns right now") - def test_insight_trends_merging_breakdown(self): - set_instance_setting("STRICT_CACHING_TEAMS", "all") - - events_by_person = { - "1": [ - { - "event": "$action", - "timestamp": datetime(2012, 1, 13, 3), - "properties": {"key": "1"}, - }, - { - "event": "$action", - "timestamp": datetime(2012, 1, 13, 3), - "properties": {"key": "2"}, - }, - ], - "2": [ - { - "event": "$action", - "timestamp": datetime(2012, 1, 13, 3), - "properties": {"key": "1"}, - } - ], - } - journeys_for(events_by_person, self.team) - - with freeze_time("2012-01-15T04:01:34.000Z"): - request = TrendsRequestBreakdown( - date_from="-14d", - display="ActionsLineGraph", - events=[ - { - "id": "$action", - "math": "dau", - "name": "$action", - "custom_name": None, - "type": "events", - "order": 0, - "properties": [], - "math_property": None, - } - ], - breakdown="key", - ) - data = get_trends_time_series_ok(self.client, request, self.team) - - assert data["$action - 1"]["2012-01-13"].value == 2 - assert data["$action - 1"]["2012-01-14"].value == 0 - assert data["$action - 1"]["2012-01-15"].value == 0 - - assert data["$action - 2"]["2012-01-13"].value == 1 - assert data["$action - 2"]["2012-01-14"].value == 0 - assert data["$action - 2"]["2012-01-15"].value == 0 - - events_by_person = { - "1": [ - { - "event": "$action", - "timestamp": datetime(2012, 1, 15, 3), - "properties": {"key": "2"}, - } - ], - "2": [ - { - "event": "$action", - "timestamp": datetime(2012, 1, 13, 3), - "properties": {"key": "2"}, - } - ], # this won't be counted - } - journeys_for(events_by_person, self.team) - - with freeze_time("2012-01-15T04:01:34.000Z"): - request = TrendsRequestBreakdown( - date_from="-14d", - display="ActionsLineGraph", - events=[ - { - "id": "$action", - "math": "dau", - "name": "$action", - "custom_name": None, - "type": "events", - "order": 0, - "properties": [], - "math_property": None, - } - ], - breakdown="key", - refresh=True, - ) - data = get_trends_time_series_ok(self.client, request, self.team) - - assert data["$action - 1"]["2012-01-13"].value == 2 - assert data["$action - 1"]["2012-01-14"].value == 0 - assert data["$action - 1"]["2012-01-15"].value == 0 - - assert data["$action - 2"]["2012-01-13"].value == 1 - assert data["$action - 2"]["2012-01-14"].value == 0 - assert data["$action - 2"]["2012-01-15"].value == 1 - - @skip("Don't handle breakdowns right now") - def test_insight_trends_merging_breakdown_multiple(self): - set_instance_setting("STRICT_CACHING_TEAMS", "all") - - events_by_person = { - "1": [ - { - "event": "$pageview", - "timestamp": datetime(2012, 1, 13, 3), - "properties": {"key": "1"}, - }, - { - "event": "$action", - "timestamp": datetime(2012, 1, 13, 3), - "properties": {"key": "1"}, - }, - { - "event": "$action", - "timestamp": datetime(2012, 1, 13, 3), - "properties": {"key": "2"}, - }, - ], - "2": [ - { - "event": "$pageview", - "timestamp": datetime(2012, 1, 13, 3), - "properties": {"key": "1"}, - }, - { - "event": "$action", - "timestamp": datetime(2012, 1, 13, 3), - "properties": {"key": "1"}, - }, - ], - } - journeys_for(events_by_person, self.team) - - with freeze_time("2012-01-15T04:01:34.000Z"): - request = TrendsRequestBreakdown( - date_from="-14d", - display="ActionsLineGraph", - events=[ - { - "id": "$pageview", - "math": "dau", - "name": "$pageview", - "custom_name": None, - "type": "events", - "order": 0, - "properties": [], - "math_property": None, - }, - { - "id": "$action", - "math": "dau", - "name": "$action", - "custom_name": None, - "type": "events", - "order": 1, - "properties": [], - "math_property": None, - }, - ], - breakdown="key", - ) - data = get_trends_time_series_ok(self.client, request, self.team) - - assert data["$pageview - 1"]["2012-01-13"].value == 2 - assert data["$pageview - 1"]["2012-01-14"].value == 0 - assert data["$pageview - 1"]["2012-01-15"].value == 0 - - assert data["$action - 1"]["2012-01-13"].value == 2 - assert data["$action - 1"]["2012-01-14"].value == 0 - assert data["$action - 1"]["2012-01-15"].value == 0 - - assert data["$action - 2"]["2012-01-13"].value == 1 - assert data["$action - 2"]["2012-01-14"].value == 0 - assert data["$action - 2"]["2012-01-15"].value == 0 - - events_by_person = { - "1": [ - { - "event": "$pageview", - "timestamp": datetime(2012, 1, 15, 3), - "properties": {"key": "1"}, - }, - { - "event": "$action", - "timestamp": datetime(2012, 1, 15, 3), - "properties": {"key": "2"}, - }, - ], - "2": [ - { - "event": "$action", - "timestamp": datetime(2012, 1, 13, 3), - "properties": {"key": "2"}, - } # this won't be counted - ], - } - journeys_for(events_by_person, self.team) - - with freeze_time("2012-01-15T04:01:34.000Z"): - request = TrendsRequestBreakdown( - date_from="-14d", - display="ActionsLineGraph", - events=[ - { - "id": "$pageview", - "math": "dau", - "name": "$pageview", - "custom_name": None, - "type": "events", - "order": 0, - "properties": [], - "math_property": None, - }, - { - "id": "$action", - "math": "dau", - "name": "$action", - "custom_name": None, - "type": "events", - "order": 1, - "properties": [], - "math_property": None, - }, - ], - breakdown="key", - refresh=True, - ) - data = get_trends_time_series_ok(self.client, request, self.team) - - assert data["$pageview - 1"]["2012-01-13"].value == 2 - assert data["$pageview - 1"]["2012-01-14"].value == 0 - assert data["$pageview - 1"]["2012-01-15"].value == 1 - - assert data["$action - 1"]["2012-01-13"].value == 2 - assert data["$action - 1"]["2012-01-14"].value == 0 - assert data["$action - 1"]["2012-01-15"].value == 0 - - assert data["$action - 2"]["2012-01-13"].value == 1 - assert data["$action - 2"]["2012-01-14"].value == 0 - assert data["$action - 2"]["2012-01-15"].value == 1 - - # When the latest time interval in the cached result doesn't match the current interval, do not use caching pattern - @snapshot_clickhouse_queries - def test_insight_trends_merging_skipped_interval(self): - set_instance_setting("STRICT_CACHING_TEAMS", "all") - - events_by_person = { - "1": [{"event": "$pageview", "timestamp": datetime(2012, 1, 13, 3)}], - "2": [{"event": "$pageview", "timestamp": datetime(2012, 1, 13, 3)}], - } - journeys_for(events_by_person, self.team) - - with freeze_time("2012-01-14T04:01:34.000Z"): - request = TrendsRequest( - date_from="-14d", - display="ActionsLineGraph", - events=[ - { - "id": "$pageview", - "math": "dau", - "name": "$pageview", - "custom_name": None, - "type": "events", - "order": 0, - "properties": [], - "math_property": None, - } - ], - ) - data = get_trends_time_series_ok(self.client, request, self.team) - - assert data["$pageview"]["2012-01-13"].value == 2 - assert data["$pageview"]["2012-01-14"].value == 0 - - events_by_person = { - "1": [{"event": "$pageview", "timestamp": datetime(2012, 1, 15, 3)}], - "2": [{"event": "$pageview", "timestamp": datetime(2012, 1, 16, 3)}], - } - journeys_for(events_by_person, self.team) - - with freeze_time("2012-01-16T04:01:34.000Z"): - request = TrendsRequest( - date_from="-14d", - display="ActionsLineGraph", - events=[ - { - "id": "$pageview", - "math": "dau", - "name": "$pageview", - "custom_name": None, - "type": "events", - "order": 0, - "properties": [], - "math_property": None, - } - ], - refresh=True, - ) - data = get_trends_time_series_ok(self.client, request, self.team) - - assert data["$pageview"]["2012-01-13"].value == 2 - assert data["$pageview"]["2012-01-14"].value == 0 - assert data["$pageview"]["2012-01-15"].value == 1 - assert data["$pageview"]["2012-01-16"].value == 1 diff --git a/ee/clickhouse/views/test/test_experiment_holdouts.py b/ee/clickhouse/views/test/test_experiment_holdouts.py deleted file mode 100644 index 4d067d1483..0000000000 --- a/ee/clickhouse/views/test/test_experiment_holdouts.py +++ /dev/null @@ -1,145 +0,0 @@ -from rest_framework import status - -from ee.api.test.base import APILicensedTest -from posthog.models.experiment import Experiment -from posthog.models.feature_flag import FeatureFlag - - -class TestExperimentHoldoutCRUD(APILicensedTest): - def test_can_list_experiment_holdouts(self): - response = self.client.get(f"/api/projects/{self.team.id}/experiment_holdouts/") - self.assertEqual(response.status_code, status.HTTP_200_OK) - - def test_create_update_experiment_holdouts(self) -> None: - response = self.client.post( - f"/api/projects/{self.team.id}/experiment_holdouts/", - data={ - "name": "Test Experiment holdout", - "filters": [ - { - "properties": [], - "rollout_percentage": 20, - "variant": "holdout", - } - ], - }, - format="json", - ) - - holdout_id = response.json()["id"] - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - self.assertEqual(response.json()["name"], "Test Experiment holdout") - self.assertEqual( - response.json()["filters"], - [{"properties": [], "rollout_percentage": 20, "variant": f"holdout-{holdout_id}"}], - ) - - # Generate experiment to be part of holdout - ff_key = "a-b-tests" - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "", - "start_date": "2021-12-01T10:23", - "end_date": None, - "feature_flag_key": ff_key, - "parameters": None, - "filters": { - "events": [ - {"order": 0, "id": "$pageview"}, - {"order": 1, "id": "$pageleave"}, - ], - "properties": [], - }, - "holdout_id": holdout_id, - }, - ) - - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - self.assertEqual(response.json()["name"], "Test Experiment") - self.assertEqual(response.json()["feature_flag_key"], ff_key) - - created_ff = FeatureFlag.objects.get(key=ff_key) - - self.assertEqual(created_ff.key, ff_key) - self.assertEqual(created_ff.filters["multivariate"]["variants"][0]["key"], "control") - self.assertEqual(created_ff.filters["multivariate"]["variants"][1]["key"], "test") - self.assertEqual(created_ff.filters["groups"][0]["properties"], []) - self.assertEqual( - created_ff.filters["holdout_groups"], - [{"properties": [], "rollout_percentage": 20, "variant": f"holdout-{holdout_id}"}], - ) - - exp_id = response.json()["id"] - # Now try updating holdout - response = self.client.patch( - f"/api/projects/{self.team.id}/experiment_holdouts/{holdout_id}", - { - "name": "Test Experiment holdout 2", - "filters": [ - { - "properties": [], - "rollout_percentage": 30, - "variant": "holdout", - } - ], - }, - ) - - self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(response.json()["name"], "Test Experiment holdout 2") - self.assertEqual( - response.json()["filters"], - [{"properties": [], "rollout_percentage": 30, "variant": f"holdout-{holdout_id}"}], - ) - - # make sure flag for experiment in question was updated as well - created_ff = FeatureFlag.objects.get(key=ff_key) - self.assertEqual( - created_ff.filters["holdout_groups"], - [{"properties": [], "rollout_percentage": 30, "variant": f"holdout-{holdout_id}"}], - ) - - # now delete holdout - response = self.client.delete(f"/api/projects/{self.team.id}/experiment_holdouts/{holdout_id}") - self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) - - # make sure flag for experiment in question was updated as well - created_ff = FeatureFlag.objects.get(key=ff_key) - self.assertEqual(created_ff.filters["holdout_groups"], None) - - # and same for experiment - exp = Experiment.objects.get(pk=exp_id) - self.assertEqual(exp.holdout, None) - - def test_invalid_create(self): - response = self.client.post( - f"/api/projects/{self.team.id}/experiment_holdouts/", - data={ - "name": None, # invalid - "filters": [ - { - "properties": [], - "rollout_percentage": 20, - "variant": "holdout", - } - ], - }, - format="json", - ) - - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json()["detail"], "This field may not be null.") - - response = self.client.post( - f"/api/projects/{self.team.id}/experiment_holdouts/", - data={ - "name": "xyz", - "filters": [], - }, - format="json", - ) - - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json()["detail"], "Filters are required to create an holdout group") diff --git a/ee/clickhouse/views/test/test_experiment_saved_metrics.py b/ee/clickhouse/views/test/test_experiment_saved_metrics.py deleted file mode 100644 index 90575cbba0..0000000000 --- a/ee/clickhouse/views/test/test_experiment_saved_metrics.py +++ /dev/null @@ -1,240 +0,0 @@ -from rest_framework import status - -from ee.api.test.base import APILicensedTest -from posthog.models.experiment import Experiment, ExperimentToSavedMetric - - -class TestExperimentSavedMetricsCRUD(APILicensedTest): - def test_can_list_experiment_saved_metrics(self): - response = self.client.get(f"/api/projects/{self.team.id}/experiment_saved_metrics/") - self.assertEqual(response.status_code, status.HTTP_200_OK) - - def test_validation_of_query_metric(self): - response = self.client.post( - f"/api/projects/{self.team.id}/experiment_saved_metrics/", - data={ - "name": "Test Experiment saved metric", - "description": "Test description", - "query": {}, - }, - format="json", - ) - - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json()["detail"], "Query is required to create a saved metric") - - response = self.client.post( - f"/api/projects/{self.team.id}/experiment_saved_metrics/", - data={ - "name": "Test Experiment saved metric", - "description": "Test description", - "query": {"not-kind": "ExperimentTrendsQuery"}, - }, - format="json", - ) - - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual( - response.json()["detail"], "Metric query kind must be 'ExperimentTrendsQuery' or 'ExperimentFunnelsQuery'" - ) - - response = self.client.post( - f"/api/projects/{self.team.id}/experiment_saved_metrics/", - data={ - "name": "Test Experiment saved metric", - "description": "Test description", - "query": {"kind": "not-ExperimentTrendsQuery"}, - }, - format="json", - ) - - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual( - response.json()["detail"], "Metric query kind must be 'ExperimentTrendsQuery' or 'ExperimentFunnelsQuery'" - ) - - response = self.client.post( - f"/api/projects/{self.team.id}/experiment_saved_metrics/", - data={ - "name": "Test Experiment saved metric", - "description": "Test description", - "query": {"kind": "TrendsQuery"}, - }, - format="json", - ) - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual( - response.json()["detail"], "Metric query kind must be 'ExperimentTrendsQuery' or 'ExperimentFunnelsQuery'" - ) - - response = self.client.post( - f"/api/projects/{self.team.id}/experiment_saved_metrics/", - data={ - "name": "Test Experiment saved metric", - "description": "Test description", - "query": {"kind": "ExperimentTrendsQuery"}, - }, - format="json", - ) - - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertTrue("'loc': ('count_query',), 'msg': 'Field required'" in response.json()["detail"]) - - response = self.client.post( - f"/api/projects/{self.team.id}/experiment_saved_metrics/", - data={ - "name": "Test Experiment saved metric", - "description": "Test description", - "query": { - "kind": "ExperimentTrendsQuery", - "count_query": {"kind": "TrendsQuery", "series": [{"kind": "EventsNode", "event": "$pageview"}]}, - }, - }, - format="json", - ) - - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - - def test_create_update_experiment_saved_metrics(self) -> None: - response = self.client.post( - f"/api/projects/{self.team.id}/experiment_saved_metrics/", - data={ - "name": "Test Experiment saved metric", - "description": "Test description", - "query": { - "kind": "ExperimentTrendsQuery", - "count_query": { - "kind": "TrendsQuery", - "series": [{"kind": "EventsNode", "event": "$pageview"}], - }, - }, - }, - format="json", - ) - - saved_metric_id = response.json()["id"] - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - self.assertEqual(response.json()["name"], "Test Experiment saved metric") - self.assertEqual(response.json()["description"], "Test description") - self.assertEqual( - response.json()["query"], - { - "kind": "ExperimentTrendsQuery", - "count_query": {"kind": "TrendsQuery", "series": [{"kind": "EventsNode", "event": "$pageview"}]}, - }, - ) - self.assertEqual(response.json()["created_by"]["id"], self.user.pk) - - # Generate experiment to have saved metric - ff_key = "a-b-tests" - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "Test Experiment", - "description": "", - "start_date": "2021-12-01T10:23", - "end_date": None, - "feature_flag_key": ff_key, - "parameters": None, - "filters": { - "events": [ - {"order": 0, "id": "$pageview"}, - {"order": 1, "id": "$pageleave"}, - ], - "properties": [], - }, - "saved_metrics_ids": [{"id": saved_metric_id, "metadata": {"type": "secondary"}}], - }, - ) - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - - exp_id = response.json()["id"] - - self.assertEqual(response.json()["name"], "Test Experiment") - self.assertEqual(response.json()["feature_flag_key"], ff_key) - - self.assertEqual(Experiment.objects.get(pk=exp_id).saved_metrics.count(), 1) - experiment_to_saved_metric = Experiment.objects.get(pk=exp_id).experimenttosavedmetric_set.first() - self.assertEqual(experiment_to_saved_metric.metadata, {"type": "secondary"}) - saved_metric = Experiment.objects.get(pk=exp_id).saved_metrics.first() - self.assertEqual(saved_metric.id, saved_metric_id) - self.assertEqual( - saved_metric.query, - { - "kind": "ExperimentTrendsQuery", - "count_query": {"kind": "TrendsQuery", "series": [{"kind": "EventsNode", "event": "$pageview"}]}, - }, - ) - - # Now try updating saved metric - response = self.client.patch( - f"/api/projects/{self.team.id}/experiment_saved_metrics/{saved_metric_id}", - { - "name": "Test Experiment saved metric 2", - "description": "Test description 2", - "query": { - "kind": "ExperimentTrendsQuery", - "count_query": {"kind": "TrendsQuery", "series": [{"kind": "EventsNode", "event": "$pageleave"}]}, - }, - }, - ) - - self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(response.json()["name"], "Test Experiment saved metric 2") - self.assertEqual( - response.json()["query"], - { - "kind": "ExperimentTrendsQuery", - "count_query": {"kind": "TrendsQuery", "series": [{"kind": "EventsNode", "event": "$pageleave"}]}, - }, - ) - - # make sure experiment in question was updated as well - self.assertEqual(Experiment.objects.get(pk=exp_id).saved_metrics.count(), 1) - saved_metric = Experiment.objects.get(pk=exp_id).saved_metrics.first() - self.assertEqual(saved_metric.id, saved_metric_id) - self.assertEqual( - saved_metric.query, - { - "kind": "ExperimentTrendsQuery", - "count_query": {"kind": "TrendsQuery", "series": [{"kind": "EventsNode", "event": "$pageleave"}]}, - }, - ) - self.assertEqual(saved_metric.name, "Test Experiment saved metric 2") - self.assertEqual(saved_metric.description, "Test description 2") - - # now delete saved metric - response = self.client.delete(f"/api/projects/{self.team.id}/experiment_saved_metrics/{saved_metric_id}") - self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT) - - # make sure experiment in question was updated as well - self.assertEqual(Experiment.objects.get(pk=exp_id).saved_metrics.count(), 0) - self.assertEqual(ExperimentToSavedMetric.objects.filter(experiment_id=exp_id).count(), 0) - - def test_invalid_create(self): - response = self.client.post( - f"/api/projects/{self.team.id}/experiment_saved_metrics/", - data={ - "name": None, # invalid - "query": { - "kind": "ExperimentTrendsQuery", - "count_query": {"kind": "TrendsQuery", "series": [{"kind": "EventsNode", "event": "$pageview"}]}, - }, - }, - format="json", - ) - - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json()["detail"], "This field may not be null.") - - response = self.client.post( - f"/api/projects/{self.team.id}/experiment_saved_metrics/", - data={ - "name": "xyz", - "query": {}, - }, - format="json", - ) - - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json()["detail"], "Query is required to create a saved metric") diff --git a/ee/conftest.py b/ee/conftest.py deleted file mode 100644 index 0de792f0d3..0000000000 --- a/ee/conftest.py +++ /dev/null @@ -1,2 +0,0 @@ -# flake8: noqa -from posthog.conftest import * diff --git a/ee/frontend/exports.ts b/ee/frontend/exports.ts deleted file mode 100644 index d973463019..0000000000 --- a/ee/frontend/exports.ts +++ /dev/null @@ -1,12 +0,0 @@ -import { PostHogEE } from '@posthog/ee/types' - -import { transformEventToWeb, transformToWeb } from './mobile-replay' - -export default async (): Promise<PostHogEE> => - Promise.resolve({ - enabled: true, - mobileReplay: { - transformEventToWeb, - transformToWeb, - }, - }) diff --git a/ee/frontend/mobile-replay/__mocks__/encoded-snapshot-data.ts b/ee/frontend/mobile-replay/__mocks__/encoded-snapshot-data.ts deleted file mode 100644 index ceb176d49c..0000000000 --- a/ee/frontend/mobile-replay/__mocks__/encoded-snapshot-data.ts +++ /dev/null @@ -1,6 +0,0 @@ -export const encodedWebSnapshotData: string[] = [ - // first item could be a network event or something else - '{"windowId":"0191C63B-03FF-73B5-96BE-40BE2761621C","data":{"payload":{"requests":[{"duration":28,"entryType":"resource","initiatorType":"fetch","method":"GET","name":"https://1.bp.blogspot.com/-hkNkoCjc5UA/T4JTlCjhhfI/AAAAAAAAB98/XxQwZ-QPkI8/s1600/Free+Google+Wallpapers+3.jpg","responseStatus":200,"timestamp":1725369200216,"transferSize":82375}]},"plugin":"rrweb/network@1"},"timestamp":1725369200216,"type":6,"seen":8833798676917222}', - '{"windowId":"0191C63B-03FF-73B5-96BE-40BE2761621C","data":{"height":852,"width":393},"timestamp":1725607643113,"type":4,"seen":4930607506458337}', - '{"windowId":"0191C63B-03FF-73B5-96BE-40BE2761621C","data":{"initialOffset":{"left":0,"top":0},"wireframes":[{"base64":"","height":852,"id":4324378400,"type":"screenshot","width":393,"x":0,"y":0}]},"timestamp":1725607643113,"type":2,"seen":2118469619185818}', -] diff --git a/ee/frontend/mobile-replay/__mocks__/increment-with-child-duplication.json b/ee/frontend/mobile-replay/__mocks__/increment-with-child-duplication.json deleted file mode 100644 index 7ffc2e5f38..0000000000 --- a/ee/frontend/mobile-replay/__mocks__/increment-with-child-duplication.json +++ /dev/null @@ -1,217 +0,0 @@ -{ - "data": { - "adds": [ - { - "parentId": 183891344, - "wireframe": { - "childWireframes": [ - { - "childWireframes": [ - { - "disabled": false, - "height": 19, - "id": 52129787, - "style": { - "color": "#000000", - "fontFamily": "sans-serif", - "fontSize": 14, - "horizontalAlign": "left", - "paddingBottom": 0, - "paddingLeft": 0, - "paddingRight": 0, - "paddingTop": 0, - "verticalAlign": "top" - }, - "text": "PostHog/posthog-ios", - "type": "text", - "width": 368, - "x": 66, - "y": 556 - }, - { - "disabled": false, - "height": 19, - "id": 99571736, - "style": { - "color": "#000000", - "fontFamily": "sans-serif", - "fontSize": 14, - "horizontalAlign": "left", - "paddingBottom": 0, - "paddingLeft": 0, - "paddingRight": 0, - "paddingTop": 0, - "verticalAlign": "top" - }, - "text": "PostHog iOS integration", - "type": "text", - "width": 150, - "x": 10, - "y": 584 - }, - { - "disabled": false, - "height": 32, - "id": 240124529, - "style": { - "color": "#000000", - "fontFamily": "sans-serif", - "fontSize": 14, - "horizontalAlign": "center", - "paddingBottom": 6, - "paddingLeft": 32, - "paddingRight": 0, - "paddingTop": 6, - "verticalAlign": "center" - }, - "text": "20", - "type": "text", - "width": 48, - "x": 10, - "y": 548 - } - ], - "disabled": false, - "height": 62, - "id": 209272202, - "style": {}, - "width": 406, - "x": 2, - "y": 540 - } - ], - "disabled": false, - "height": 70, - "id": 142908405, - "style": { - "backgroundImage": "iVBORw0KGgoAAAANSUhEUgAABDgAAAC5CAYAAADNs4/hAAAAAXNSR0IArs4c6QAAAARzQklUCAgI\nCHwIZIgAAAWeSURBVHic7dyxqh1lGIbR77cIBuzTWKUShDSChVh7ITaWFoH0Emsr78D7sBVCBFOZ\nQMo0XoAEEshvZeeZ8cR9PDywVjsfm7d+mNkzAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAADA7Vn/5mjv/fHMfDszX83MgxtdBAAAADDzbGZ+npkf1lqvzo5PA8fe++uZ+XFm\n7v73bQAAAADX8npmvllr/XR0dBg49t73Z+b3mblzwWEAAAAA1/FmZj5da7286uCDkx94POIGAAAA\ncLvuzMx3RwdngeOzy20BAAAAeG+HjeLKT1T23h/OzJ9zHkEAAAAAbtq7mflorfX6nx6e/QfHvpFJ\nAAAAANe01rqyY3g7AwAAAMgTOAAAAIA8gQMAAADIEzgAAACAPIEDAAAAyBM4AAAAgDyBAwAAAMgT\nOAAAAIA8gQMAAADIEzgAAACAPIEDAAAAyBM4AAAAgDyBAwAAAMgTOAAAAIA8gQMAAADIEzgAAACA\nPIEDAAAAyBM4AAAAgDyBAwAAAMgTOAAAAIA8gQMAAADIEzgAAACAPIEDAAAAyBM4AAAAgDyBAwAA\nAMgTOAAAAIA8gQMAAADIEzgAAACAPIEDAAAAyBM4AAAAgDyBAwAAAMgTOAAAAIA8gQMAAADIEzgA\nAACAPIEDAAAAyBM4AAAAgDyBAwAAAMgTOAAAAIA8gQMAAADIEzgAAACAPIEDAAAAyBM4AAAAgDyB\nAwAAAMgTOAAAAIA8gQMAAADIEzgAAACAPIEDAAAAyBM4AAAAgDyBAwAAAMgTOAAAAIA8gQMAAADI\nEzgAAACAPIEDAAAAyBM4AAAAgDyBAwAAAMgTOAAAAIA8gQMAAADIEzgAAACAPIEDAAAAyBM4AAAA\ngDyBAwAAAMgTOAAAAIA8gQMAAADIEzgAAACAPIEDAAAAyBM4AAAAgDyBAwAAAMgTOAAAAIA8gQMA\nAADIEzgAAACAPIEDAAAAyBM4AAAAgDyBAwAAAMgTOAAAAIA8gQMAAADIEzgAAACAPIEDAAAAyBM4\nAAAAgDyBAwAAAMgTOAAAAIA8gQMAAADIEzgAAACAPIEDAAAAyBM4AAAAgDyBAwAAAMgTOAAAAIA8\ngQMAAADIEzgAAACAPIEDAAAAyBM4AAAAgDyBAwAAAMgTOAAAAIA8gQMAAADIEzgAAACAPIEDAAAA\nyBM4AAAAgDyBAwAAAMgTOAAAAIA8gQMAAADIEzgAAACAPIEDAAAAyBM4AAAAgDyBAwAAAMgTOAAA\nAIA8gQMAAADIEzgAAACAPIEDAAAAyBM4AAAAgDyBAwAAAMgTOAAAAIA8gQMAAADIEzgAAACAPIED\nAAAAyBM4AAAAgDyBAwAAAMgTOAAAAIA8gQMAAADIEzgAAACAPIEDAAAAyBM4AAAAgDyBAwAAAMgT\nOAAAAIA8gQMAAADIEzgAAACAPIEDAAAAyBM4AAAAgDyBAwAAAMgTOAAAAIA8gQMAAADIEzgAAACA\nPIEDAAAAyBM4AAAAgDyBAwAAAMgTOAAAAIA8gQMAAADIEzgAAACAPIEDAAAAyBM4AAAAgDyBAwAA\nAMgTOAAAAIA8gQMAAADIEzgAAACAPIEDAAAAyBM4AAAAgDyBAwAAAMgTOAAAAIA8gQMAAADIEzgA\nAACAPIEDAAAAyBM4AAAAgDyBAwAAAMgTOAAAAIA8gQMAAADIOwscL/6XFQAAAADHDhvFWeD47YJD\nAAAAAN7XYaNYRw/33p/PzC/jUxYAAADg9rydmS/WWk+vOjgMF2utJzPz+NKrAAAAAK7h0VHcmDl5\ng+Nve+8vZ+b7mflkZu5dYBgAAADAkT9m5vnMPFxr/XrbYwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\nAAAAAAAAAAAAAAAAAAAAAAAAgCN/AW0xMqHnNQceAAAAAElFTkSuQmCC\n" - }, - "width": 411, - "x": 0, - "y": 536 - } - }, - { - "parentId": 142908405, - "wireframe": { - "childWireframes": [ - { - "disabled": false, - "height": 19, - "id": 52129787, - "style": { - "color": "#000000", - "fontFamily": "sans-serif", - "fontSize": 14, - "horizontalAlign": "left", - "paddingBottom": 0, - "paddingLeft": 0, - "paddingRight": 0, - "paddingTop": 0, - "verticalAlign": "top" - }, - "text": "PostHog/posthog-ios", - "type": "text", - "width": 368, - "x": 66, - "y": 556 - }, - { - "disabled": false, - "height": 19, - "id": 99571736, - "style": { - "color": "#000000", - "fontFamily": "sans-serif", - "fontSize": 14, - "horizontalAlign": "left", - "paddingBottom": 0, - "paddingLeft": 0, - "paddingRight": 0, - "paddingTop": 0, - "verticalAlign": "top" - }, - "text": "PostHog iOS integration", - "type": "text", - "width": 150, - "x": 10, - "y": 584 - }, - { - "disabled": false, - "height": 32, - "id": 240124529, - "style": { - "color": "#000000", - "fontFamily": "sans-serif", - "fontSize": 14, - "horizontalAlign": "center", - "paddingBottom": 6, - "paddingLeft": 32, - "paddingRight": 0, - "paddingTop": 6, - "verticalAlign": "center" - }, - "text": "20", - "type": "text", - "width": 48, - "x": 10, - "y": 548 - } - ], - "disabled": false, - "height": 62, - "id": 209272202, - "style": {}, - "width": 406, - "x": 2, - "y": 540 - } - }, - { - "parentId": 209272202, - "wireframe": { - "disabled": false, - "height": 19, - "id": 52129787, - "style": { - "color": "#000000", - "fontFamily": "sans-serif", - "fontSize": 14, - "horizontalAlign": "left", - "paddingBottom": 0, - "paddingLeft": 0, - "paddingRight": 0, - "paddingTop": 0, - "verticalAlign": "top" - }, - "text": "PostHog/posthog-ios", - "type": "text", - "width": 368, - "x": 66, - "y": 556 - } - }, - { - "parentId": 209272202, - "wireframe": { - "id": 52129787123, - "type": "text" - } - } - ], - "removes": [ - { - "id": 149659273, - "parentId": 47740111 - }, - { - "id": 151255663, - "parentId": 149659273 - } - ], - "source": 0 - }, - "timestamp": 1706104140861, - "type": 3 -} diff --git a/ee/frontend/mobile-replay/__snapshots__/parsing.test.ts.snap b/ee/frontend/mobile-replay/__snapshots__/parsing.test.ts.snap deleted file mode 100644 index c916dd21d5..0000000000 --- a/ee/frontend/mobile-replay/__snapshots__/parsing.test.ts.snap +++ /dev/null @@ -1,339 +0,0 @@ -// Jest Snapshot v1, https://goo.gl/fbAQLP - -exports[`snapshot parsing handles mobile data with no meta event 1`] = ` -[ - { - "data": { - "payload": { - "requests": [ - { - "duration": 28, - "entryType": "resource", - "initiatorType": "fetch", - "method": "GET", - "name": "https://1.bp.blogspot.com/-hkNkoCjc5UA/T4JTlCjhhfI/AAAAAAAAB98/XxQwZ-QPkI8/s1600/Free+Google+Wallpapers+3.jpg", - "responseStatus": 200, - "timestamp": 1725369200216, - "transferSize": 82375, - }, - ], - }, - "plugin": "rrweb/network@1", - }, - "seen": 8833798676917222, - "timestamp": 1725369200216, - "type": 6, - "windowId": "0191C63B-03FF-73B5-96BE-40BE2761621C", - }, - { - "data": { - "height": 852, - "href": "", - "width": 393, - }, - "timestamp": 1725607643113, - "type": 4, - "windowId": "0191C63B-03FF-73B5-96BE-40BE2761621C", - }, - { - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 101, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 100, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4324378400, - "height": 852, - "src": "", - "style": "width: 393px;height: 852px;position: fixed;left: 0px;top: 0px;", - "width": 393, - }, - "childNodes": [], - "id": 4324378400, - "tagName": "img", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1725607643113, - "type": 2, - "windowId": "0191C63B-03FF-73B5-96BE-40BE2761621C", - }, -] -`; - -exports[`snapshot parsing handles normal mobile data 1`] = ` -[ - { - "data": { - "payload": { - "requests": [ - { - "duration": 28, - "entryType": "resource", - "initiatorType": "fetch", - "method": "GET", - "name": "https://1.bp.blogspot.com/-hkNkoCjc5UA/T4JTlCjhhfI/AAAAAAAAB98/XxQwZ-QPkI8/s1600/Free+Google+Wallpapers+3.jpg", - "responseStatus": 200, - "timestamp": 1725369200216, - "transferSize": 82375, - }, - ], - }, - "plugin": "rrweb/network@1", - }, - "seen": 8833798676917222, - "timestamp": 1725369200216, - "type": 6, - "windowId": "0191C63B-03FF-73B5-96BE-40BE2761621C", - }, - { - "data": { - "height": 852, - "href": "", - "width": 393, - }, - "timestamp": 1725607643113, - "type": 4, - "windowId": "0191C63B-03FF-73B5-96BE-40BE2761621C", - }, - { - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 101, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 100, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4324378400, - "height": 852, - "src": "", - "style": "width: 393px;height: 852px;position: fixed;left: 0px;top: 0px;", - "width": 393, - }, - "childNodes": [], - "id": 4324378400, - "tagName": "img", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1725607643113, - "type": 2, - "windowId": "0191C63B-03FF-73B5-96BE-40BE2761621C", - }, -] -`; diff --git a/ee/frontend/mobile-replay/__snapshots__/transform.test.ts.snap b/ee/frontend/mobile-replay/__snapshots__/transform.test.ts.snap deleted file mode 100644 index 7a2e0b8820..0000000000 --- a/ee/frontend/mobile-replay/__snapshots__/transform.test.ts.snap +++ /dev/null @@ -1,9005 +0,0 @@ -// Jest Snapshot v1, https://goo.gl/fbAQLP - -exports[`replay/transform transform can convert images 1`] = ` -[ - { - "data": { - "height": 600, - "href": "", - "width": 300, - }, - "timestamp": 1, - "type": 4, - }, - { - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 102, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 101, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 12345, - "style": "color: #ffffff;width: 100px;height: 30px;position: fixed;left: 11px;top: 12px;overflow:hidden;white-space:normal;", - }, - "childNodes": [ - { - "id": 100, - "textContent": "β°Ÿι²δ©žγ‘›μ“―μž˜αŒ«δ΅€γ₯¦ι·λžιˆ…ζ―…β”ŒλΉ―ζΉŒα²—", - "type": 3, - }, - ], - "id": 12345, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 12345, - "height": 30, - "src": "", - "style": "width: 100px;height: 30px;position: fixed;left: 25px;top: 42px;", - "width": 100, - }, - "childNodes": [], - "id": 12345, - "tagName": "img", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, - }, -] -`; - -exports[`replay/transform transform can convert invalid text wireframe 1`] = ` -[ - { - "data": { - "height": 600, - "href": "", - "width": 300, - }, - "timestamp": 1, - "type": 4, - }, - { - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 102, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 101, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 12345, - "style": "border-width: 4px;border-radius: 10px;border-color: #ee3ee4;border-style: solid;color: #ee3ee4;width: 100px;height: 30px;position: fixed;left: 11px;top: 12px;overflow:hidden;white-space:normal;", - }, - "childNodes": [], - "id": 12345, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, - }, -] -`; - -exports[`replay/transform transform can convert navigation bar 1`] = ` -[ - { - "data": { - "height": 600, - "href": "", - "width": 300, - }, - "timestamp": 1, - "type": 4, - }, - { - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 107, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 106, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 12345, - "style": "border-width: 4px;border-radius: 10px;border-color: #ee3ee4;border-style: solid;color: #ee3ee4;width: 100px;height: 30px;position: fixed;left: 11px;top: 12px;display:flex;flex-direction:row;align-items:center;justify-content:space-around;color:black;", - }, - "childNodes": [ - { - "attributes": {}, - "childNodes": [ - { - "id": 101, - "textContent": "β—€", - "type": 3, - }, - ], - "id": 100, - "tagName": "div", - "type": 2, - }, - { - "attributes": {}, - "childNodes": [ - { - "id": 103, - "textContent": "βšͺ", - "type": 3, - }, - ], - "id": 102, - "tagName": "div", - "type": 2, - }, - { - "attributes": {}, - "childNodes": [ - { - "id": 105, - "textContent": "⬜️", - "type": 3, - }, - ], - "id": 104, - "tagName": "div", - "type": 2, - }, - ], - "id": 12345, - "tagName": "div", - "type": 2, - }, - ], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, - }, -] -`; - -exports[`replay/transform transform can convert rect with text 1`] = ` -[ - { - "data": { - "height": 600, - "href": "", - "width": 300, - }, - "timestamp": 1, - "type": 4, - }, - { - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 102, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 101, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 12345, - "style": "border-width: 4px;border-radius: 10px;border-color: #ee3ee4;border-style: solid;color: #ee3ee4;width: 100px;height: 30px;position: fixed;left: 11px;top: 12px;", - }, - "childNodes": [], - "id": 12345, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 12345, - "style": "width: 100px;height: 30px;position: fixed;left: 13px;top: 17px;overflow:hidden;white-space:normal;", - }, - "childNodes": [ - { - "id": 100, - "textContent": "i am in the box", - "type": 3, - }, - ], - "id": 12345, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, - }, -] -`; - -exports[`replay/transform transform can convert status bar 1`] = ` -[ - { - "data": { - "height": 600, - "href": "", - "width": 300, - }, - "timestamp": 1, - "type": 4, - }, - { - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 104, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 103, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 12, - "style": "color: black;width: 100px;height: 0px;position: fixed;left: 13px;top: 17px;display:flex;flex-direction:row;align-items:center;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 102, - "style": "width: 5px;", - }, - "childNodes": [], - "id": 102, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 100, - }, - "childNodes": [ - { - "id": 101, - "textContent": "12:00 AM", - "type": 3, - }, - ], - "id": 100, - "tagName": "div", - "type": 2, - }, - ], - "id": 12, - "tagName": "div", - "type": 2, - }, - ], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, - }, -] -`; - -exports[`replay/transform transform can ignore unknown wireframe types 1`] = ` -[ - { - "data": { - "height": 600, - "href": "", - "width": 300, - }, - "timestamp": 1, - "type": 4, - }, - { - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 101, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 100, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, - }, -] -`; - -exports[`replay/transform transform can process screenshot mutation 1`] = ` -[ - { - "data": { - "height": 600, - "href": "", - "width": 300, - }, - "timestamp": 1, - "type": 4, - }, - { - "data": { - "adds": [ - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 151700670, - "height": 914, - "src": "-image-content", - "style": "background-color: #F3EFF7;width: 411px;height: 914px;position: fixed;left: 0px;top: 0px;", - "width": 411, - }, - "childNodes": [], - "id": 151700670, - "tagName": "img", - "type": 2, - }, - "parentId": 5, - }, - ], - "attributes": [], - "removes": [ - { - "id": 151700670, - "parentId": 5, - }, - ], - "source": 0, - "texts": [], - }, - "seen": 3551987272322930, - "timestamp": 1714397336836, - "type": 3, - "windowId": "5173a13e-abac-4def-b227-2f81dc2808b6", - }, -] -`; - -exports[`replay/transform transform can process top level screenshot 1`] = ` -[ - { - "data": { - "height": 600, - "href": "", - "width": 300, - }, - "timestamp": 1, - "type": 4, - }, - { - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 101, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 100, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 151700670, - "height": 914, - "src": "-content", - "style": "background-color: #F3EFF7;width: 411px;height: 914px;position: fixed;left: 0px;top: 0px;", - "width": 411, - }, - "childNodes": [], - "id": 151700670, - "tagName": "img", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1714397321578, - "type": 2, - }, -] -`; - -exports[`replay/transform transform can process unknown types without error 1`] = ` -[ - { - "data": { - "height": 600, - "href": "", - "width": 300, - }, - "timestamp": 1, - "type": 4, - }, - { - "data": { - "height": 600, - "href": "included when present", - "width": 300, - }, - "timestamp": 1, - "type": 4, - }, - { - "type": 9999, - }, - { - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 102, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 101, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 12345, - "style": "background-color: #f3f4ef;background-image: url("");background-size: auto;background-repeat: unset;color: #35373e;width: 100px;height: 30px;position: fixed;left: 25px;top: 42px;align-items: center;justify-content: center;display: flex;", - }, - "childNodes": [ - { - "id": 100, - "textContent": "image", - "type": 3, - }, - ], - "id": 12345, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, - }, -] -`; - -exports[`replay/transform transform can set background image to base64 png 1`] = ` -[ - { - "data": { - "height": 600, - "href": "", - "width": 300, - }, - "timestamp": 1, - "type": 4, - }, - { - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 101, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 100, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 12345, - "style": "background-image: url('');background-size: contain;background-repeat: no-repeat;height: 30px;position: fixed;left: 0px;top: 0px;overflow:hidden;white-space:nowrap;", - }, - "childNodes": [], - "id": 12345, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 12346, - "style": "background-image: url('');background-size: contain;background-repeat: no-repeat;height: 30px;position: fixed;left: 0px;top: 0px;overflow:hidden;white-space:nowrap;", - }, - "childNodes": [], - "id": 12346, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 12346, - "style": "background-image: url('');background-size: cover;background-repeat: no-repeat;height: 30px;position: fixed;left: 0px;top: 0px;overflow:hidden;white-space:nowrap;", - }, - "childNodes": [], - "id": 12346, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 12346, - "style": "height: 30px;position: fixed;left: 0px;top: 0px;overflow:hidden;white-space:nowrap;", - }, - "childNodes": [], - "id": 12346, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, - }, -] -`; - -exports[`replay/transform transform can short-circuit non-mobile full snapshot 1`] = ` -[ - { - "data": { - "height": 600, - "href": "https://my-awesome.site", - "width": 300, - }, - "timestamp": 1, - "type": 4, - }, - { - "data": { - "node": { - "the": "payload", - }, - }, - "timestamp": 1, - "type": 2, - }, -] -`; - -exports[`replay/transform transform child wireframes are processed 1`] = ` -[ - { - "data": { - "height": 600, - "href": "", - "width": 300, - }, - "timestamp": 1, - "type": 4, - }, - { - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 104, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 103, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 123456789, - "style": "position: fixed;left: 0px;top: 0px;overflow:hidden;white-space:nowrap;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 98765, - "style": "position: fixed;left: 0px;top: 0px;overflow:hidden;white-space:nowrap;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 12345, - "style": "background-color: #000000;border-width: 4px;border-radius: 10px;border-color: #000ddd;border-style: solid;color: #ffffff;width: 100px;height: 30px;position: fixed;left: 11px;top: 12px;overflow:hidden;white-space:normal;", - }, - "childNodes": [ - { - "id": 100, - "textContent": "first nested", - "type": 3, - }, - ], - "id": 12345, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 12345, - "style": "background-color: #000000;border-width: 4px;border-radius: 10px;border-color: #000ddd;border-style: solid;color: #ffffff;width: 100px;height: 30px;position: fixed;left: 11px;top: 12px;overflow:hidden;white-space:normal;", - }, - "childNodes": [ - { - "id": 101, - "textContent": "second nested", - "type": 3, - }, - ], - "id": 12345, - "tagName": "div", - "type": 2, - }, - ], - "id": 98765, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 12345, - "style": "background-color: #000000;border-width: 4px;border-radius: 10px;border-color: #000ddd;border-style: solid;color: #ffffff;width: 100px;height: 30px;position: fixed;left: 11px;top: 12px;overflow:hidden;white-space:normal;", - }, - "childNodes": [ - { - "id": 102, - "textContent": "third (different level) nested", - "type": 3, - }, - ], - "id": 12345, - "tagName": "div", - "type": 2, - }, - ], - "id": 123456789, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, - }, -] -`; - -exports[`replay/transform transform incremental mutations de-duplicate the tree 1`] = ` -{ - "data": { - "adds": [ - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 142908405, - "style": "background-image: url('');background-size: contain;background-repeat: no-repeat;width: 411px;height: 70px;position: fixed;left: 0px;top: 536px;overflow:hidden;white-space:nowrap;", - }, - "childNodes": [], - "id": 142908405, - "tagName": "div", - "type": 2, - }, - "parentId": 183891344, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 209272202, - "style": "width: 406px;height: 62px;position: fixed;left: 2px;top: 540px;overflow:hidden;white-space:nowrap;", - }, - "childNodes": [], - "id": 209272202, - "tagName": "div", - "type": 2, - }, - "parentId": 142908405, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 99571736, - "style": "color: #000000;width: 150px;height: 19px;position: fixed;left: 10px;top: 584px;align-items: flex-start;justify-content: flex-start;display: flex;padding-left: 0px;padding-right: 0px;padding-top: 0px;padding-bottom: 0px;font-size: 14px;font-family: sans-serif;overflow:hidden;white-space:normal;", - }, - "childNodes": [], - "id": 99571736, - "tagName": "div", - "type": 2, - }, - "parentId": 209272202, - }, - { - "nextId": null, - "node": { - "id": 109, - "textContent": "PostHog iOS integration", - "type": 3, - }, - "parentId": 99571736, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 240124529, - "style": "color: #000000;width: 48px;height: 32px;position: fixed;left: 10px;top: 548px;align-items: center;justify-content: center;display: flex;padding-left: 32px;padding-right: 0px;padding-top: 6px;padding-bottom: 6px;font-size: 14px;font-family: sans-serif;overflow:hidden;white-space:normal;", - }, - "childNodes": [], - "id": 240124529, - "tagName": "div", - "type": 2, - }, - "parentId": 209272202, - }, - { - "nextId": null, - "node": { - "id": 110, - "textContent": "20", - "type": 3, - }, - "parentId": 240124529, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 52129787, - "style": "color: #000000;width: 368px;height: 19px;position: fixed;left: 66px;top: 556px;align-items: flex-start;justify-content: flex-start;display: flex;padding-left: 0px;padding-right: 0px;padding-top: 0px;padding-bottom: 0px;font-size: 14px;font-family: sans-serif;overflow:hidden;white-space:normal;", - }, - "childNodes": [], - "id": 52129787, - "tagName": "div", - "type": 2, - }, - "parentId": 209272202, - }, - { - "nextId": null, - "node": { - "id": 111, - "textContent": "PostHog/posthog-ios", - "type": 3, - }, - "parentId": 52129787, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 52129787123, - "style": "position: fixed;left: 0px;top: 0px;overflow:hidden;white-space:normal;", - }, - "childNodes": [], - "id": 52129787123, - "tagName": "div", - "type": 2, - }, - "parentId": 209272202, - }, - ], - "attributes": [], - "removes": [ - { - "id": 149659273, - "parentId": 47740111, - }, - { - "id": 151255663, - "parentId": 149659273, - }, - ], - "source": 0, - "texts": [], - }, - "default": { - "data": { - "adds": [ - { - "parentId": 183891344, - "wireframe": { - "childWireframes": [ - { - "childWireframes": [ - { - "disabled": false, - "height": 19, - "id": 52129787, - "style": { - "color": "#000000", - "fontFamily": "sans-serif", - "fontSize": 14, - "horizontalAlign": "left", - "paddingBottom": 0, - "paddingLeft": 0, - "paddingRight": 0, - "paddingTop": 0, - "verticalAlign": "top", - }, - "text": "PostHog/posthog-ios", - "type": "text", - "width": 368, - "x": 66, - "y": 556, - }, - { - "disabled": false, - "height": 19, - "id": 99571736, - "style": { - "color": "#000000", - "fontFamily": "sans-serif", - "fontSize": 14, - "horizontalAlign": "left", - "paddingBottom": 0, - "paddingLeft": 0, - "paddingRight": 0, - "paddingTop": 0, - "verticalAlign": "top", - }, - "text": "PostHog iOS integration", - "type": "text", - "width": 150, - "x": 10, - "y": 584, - }, - { - "disabled": false, - "height": 32, - "id": 240124529, - "style": { - "color": "#000000", - "fontFamily": "sans-serif", - "fontSize": 14, - "horizontalAlign": "center", - "paddingBottom": 6, - "paddingLeft": 32, - "paddingRight": 0, - "paddingTop": 6, - "verticalAlign": "center", - }, - "text": "20", - "type": "text", - "width": 48, - "x": 10, - "y": 548, - }, - ], - "disabled": false, - "height": 62, - "id": 209272202, - "style": {}, - "width": 406, - "x": 2, - "y": 540, - }, - ], - "disabled": false, - "height": 70, - "id": 142908405, - "style": { - "backgroundImage": "iVBORw0KGgoAAAANSUhEUgAABDgAAAC5CAYAAADNs4/hAAAAAXNSR0IArs4c6QAAAARzQklUCAgI -CHwIZIgAAAWeSURBVHic7dyxqh1lGIbR77cIBuzTWKUShDSChVh7ITaWFoH0Emsr78D7sBVCBFOZ -QMo0XoAEEshvZeeZ8cR9PDywVjsfm7d+mNkzAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -AAAAAAAAAAAAAADA7Vn/5mjv/fHMfDszX83MgxtdBAAAADDzbGZ+npkf1lqvzo5PA8fe++uZ+XFm -7v73bQAAAADX8npmvllr/XR0dBg49t73Z+b3mblzwWEAAAAA1/FmZj5da7286uCDkx94POIGAAAA -cLvuzMx3RwdngeOzy20BAAAAeG+HjeLKT1T23h/OzJ9zHkEAAAAAbtq7mflorfX6nx6e/QfHvpFJ -AAAAANe01rqyY3g7AwAAAMgTOAAAAIA8gQMAAADIEzgAAACAPIEDAAAAyBM4AAAAgDyBAwAAAMgT -OAAAAIA8gQMAAADIEzgAAACAPIEDAAAAyBM4AAAAgDyBAwAAAMgTOAAAAIA8gQMAAADIEzgAAACA -PIEDAAAAyBM4AAAAgDyBAwAAAMgTOAAAAIA8gQMAAADIEzgAAACAPIEDAAAAyBM4AAAAgDyBAwAA -AMgTOAAAAIA8gQMAAADIEzgAAACAPIEDAAAAyBM4AAAAgDyBAwAAAMgTOAAAAIA8gQMAAADIEzgA -AACAPIEDAAAAyBM4AAAAgDyBAwAAAMgTOAAAAIA8gQMAAADIEzgAAACAPIEDAAAAyBM4AAAAgDyB -AwAAAMgTOAAAAIA8gQMAAADIEzgAAACAPIEDAAAAyBM4AAAAgDyBAwAAAMgTOAAAAIA8gQMAAADI -EzgAAACAPIEDAAAAyBM4AAAAgDyBAwAAAMgTOAAAAIA8gQMAAADIEzgAAACAPIEDAAAAyBM4AAAA -gDyBAwAAAMgTOAAAAIA8gQMAAADIEzgAAACAPIEDAAAAyBM4AAAAgDyBAwAAAMgTOAAAAIA8gQMA -AADIEzgAAACAPIEDAAAAyBM4AAAAgDyBAwAAAMgTOAAAAIA8gQMAAADIEzgAAACAPIEDAAAAyBM4 -AAAAgDyBAwAAAMgTOAAAAIA8gQMAAADIEzgAAACAPIEDAAAAyBM4AAAAgDyBAwAAAMgTOAAAAIA8 -gQMAAADIEzgAAACAPIEDAAAAyBM4AAAAgDyBAwAAAMgTOAAAAIA8gQMAAADIEzgAAACAPIEDAAAA -yBM4AAAAgDyBAwAAAMgTOAAAAIA8gQMAAADIEzgAAACAPIEDAAAAyBM4AAAAgDyBAwAAAMgTOAAA -AIA8gQMAAADIEzgAAACAPIEDAAAAyBM4AAAAgDyBAwAAAMgTOAAAAIA8gQMAAADIEzgAAACAPIED -AAAAyBM4AAAAgDyBAwAAAMgTOAAAAIA8gQMAAADIEzgAAACAPIEDAAAAyBM4AAAAgDyBAwAAAMgT -OAAAAIA8gQMAAADIEzgAAACAPIEDAAAAyBM4AAAAgDyBAwAAAMgTOAAAAIA8gQMAAADIEzgAAACA -PIEDAAAAyBM4AAAAgDyBAwAAAMgTOAAAAIA8gQMAAADIEzgAAACAPIEDAAAAyBM4AAAAgDyBAwAA -AMgTOAAAAIA8gQMAAADIEzgAAACAPIEDAAAAyBM4AAAAgDyBAwAAAMgTOAAAAIA8gQMAAADIEzgA -AACAPIEDAAAAyBM4AAAAgDyBAwAAAMgTOAAAAIA8gQMAAADIOwscL/6XFQAAAADHDhvFWeD47YJD -AAAAAN7XYaNYRw/33p/PzC/jUxYAAADg9rydmS/WWk+vOjgMF2utJzPz+NKrAAAAAK7h0VHcmDl5 -g+Nve+8vZ+b7mflkZu5dYBgAAADAkT9m5vnMPFxr/XrbYwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -AAAAAAAAAAAAAAAAAAAAAAAAgCN/AW0xMqHnNQceAAAAAElFTkSuQmCC -", - }, - "width": 411, - "x": 0, - "y": 536, - }, - }, - { - "parentId": 142908405, - "wireframe": { - "childWireframes": [ - { - "disabled": false, - "height": 19, - "id": 52129787, - "style": { - "color": "#000000", - "fontFamily": "sans-serif", - "fontSize": 14, - "horizontalAlign": "left", - "paddingBottom": 0, - "paddingLeft": 0, - "paddingRight": 0, - "paddingTop": 0, - "verticalAlign": "top", - }, - "text": "PostHog/posthog-ios", - "type": "text", - "width": 368, - "x": 66, - "y": 556, - }, - { - "disabled": false, - "height": 19, - "id": 99571736, - "style": { - "color": "#000000", - "fontFamily": "sans-serif", - "fontSize": 14, - "horizontalAlign": "left", - "paddingBottom": 0, - "paddingLeft": 0, - "paddingRight": 0, - "paddingTop": 0, - "verticalAlign": "top", - }, - "text": "PostHog iOS integration", - "type": "text", - "width": 150, - "x": 10, - "y": 584, - }, - { - "disabled": false, - "height": 32, - "id": 240124529, - "style": { - "color": "#000000", - "fontFamily": "sans-serif", - "fontSize": 14, - "horizontalAlign": "center", - "paddingBottom": 6, - "paddingLeft": 32, - "paddingRight": 0, - "paddingTop": 6, - "verticalAlign": "center", - }, - "text": "20", - "type": "text", - "width": 48, - "x": 10, - "y": 548, - }, - ], - "disabled": false, - "height": 62, - "id": 209272202, - "style": {}, - "width": 406, - "x": 2, - "y": 540, - }, - }, - { - "parentId": 209272202, - "wireframe": { - "disabled": false, - "height": 19, - "id": 52129787, - "style": { - "color": "#000000", - "fontFamily": "sans-serif", - "fontSize": 14, - "horizontalAlign": "left", - "paddingBottom": 0, - "paddingLeft": 0, - "paddingRight": 0, - "paddingTop": 0, - "verticalAlign": "top", - }, - "text": "PostHog/posthog-ios", - "type": "text", - "width": 368, - "x": 66, - "y": 556, - }, - }, - { - "parentId": 209272202, - "wireframe": { - "id": 52129787123, - "type": "text", - }, - }, - ], - "removes": [ - { - "id": 151255663, - "parentId": 149659273, - }, - { - "id": 149659273, - "parentId": 47740111, - }, - ], - "source": 0, - }, - "timestamp": 1706104140861, - "type": 3, - }, - "timestamp": 1706104140861, - "type": 3, -} -`; - -exports[`replay/transform transform inputs buttons with nested elements 1`] = ` -{ - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 102, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 101, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 12359, - "style": "width: 100px;height: 30px;position: fixed;left: 0px;top: 0px;", - "type": "button", - }, - "childNodes": [], - "id": 12359, - "tagName": "button", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 12361, - "style": "width: 100px;height: 30px;position: fixed;left: 0px;top: 0px;", - "type": "button", - }, - "childNodes": [ - { - "id": 100, - "textContent": "click me", - "type": 3, - }, - ], - "id": 12361, - "tagName": "button", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, -} -`; - -exports[`replay/transform transform inputs closed keyboard custom event 1`] = ` -{ - "data": { - "adds": [], - "attributes": [], - "removes": [ - { - "id": 10, - "parentId": 9, - }, - ], - "source": 0, - "texts": [], - }, - "timestamp": 1, - "type": 3, -} -`; - -exports[`replay/transform transform inputs input - $inputType - hello 1`] = ` -{ - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 101, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 100, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, -} -`; - -exports[`replay/transform transform inputs input - button - click me 1`] = ` -{ - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 102, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 101, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 12358, - "style": "width: 100px;height: 30px;position: fixed;left: 0px;top: 0px;", - "type": "button", - }, - "childNodes": [ - { - "id": 100, - "textContent": "click me", - "type": 3, - }, - ], - "id": 12358, - "tagName": "button", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, -} -`; - -exports[`replay/transform transform inputs input - checkbox - $value 1`] = ` -{ - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 103, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 102, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 101, - "style": "width: 100px;height: 30px;position: fixed;left: 0px;top: 0px;", - }, - "childNodes": [ - { - "attributes": { - "checked": true, - "data-rrweb-id": 12357, - "style": null, - "type": "checkbox", - }, - "childNodes": [], - "id": 12357, - "tagName": "input", - "type": 2, - }, - { - "id": 100, - "textContent": "first", - "type": 3, - }, - ], - "id": 101, - "tagName": "label", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, -} -`; - -exports[`replay/transform transform inputs input - checkbox - $value 2`] = ` -{ - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 103, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 102, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 101, - "style": "width: 100px;height: 30px;position: fixed;left: 0px;top: 0px;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 12357, - "style": null, - "type": "checkbox", - }, - "childNodes": [], - "id": 12357, - "tagName": "input", - "type": 2, - }, - { - "id": 100, - "textContent": "second", - "type": 3, - }, - ], - "id": 101, - "tagName": "label", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, -} -`; - -exports[`replay/transform transform inputs input - checkbox - $value 3`] = ` -{ - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 103, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 102, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 101, - "style": "width: 100px;height: 30px;position: fixed;left: 0px;top: 0px;", - }, - "childNodes": [ - { - "attributes": { - "checked": true, - "data-rrweb-id": 12357, - "disabled": true, - "style": null, - "type": "checkbox", - }, - "childNodes": [], - "id": 12357, - "tagName": "input", - "type": 2, - }, - { - "id": 100, - "textContent": "third", - "type": 3, - }, - ], - "id": 101, - "tagName": "label", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, -} -`; - -exports[`replay/transform transform inputs input - checkbox - $value 4`] = ` -{ - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 101, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 100, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "checked": true, - "data-rrweb-id": 12357, - "style": "width: 100px;height: 30px;position: fixed;left: 0px;top: 0px;", - "type": "checkbox", - }, - "childNodes": [], - "id": 12357, - "tagName": "input", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, -} -`; - -exports[`replay/transform transform inputs input - email - $value 1`] = ` -{ - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 101, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 100, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 12349, - "style": "width: 100px;height: 30px;position: fixed;left: 0px;top: 0px;", - "type": "email", - "value": "", - }, - "childNodes": [], - "id": 12349, - "tagName": "input", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, -} -`; - -exports[`replay/transform transform inputs input - number - $value 1`] = ` -{ - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 101, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 100, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 12350, - "style": "width: 100px;height: 30px;position: fixed;left: 0px;top: 0px;", - "type": "number", - "value": "", - }, - "childNodes": [], - "id": 12350, - "tagName": "input", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, -} -`; - -exports[`replay/transform transform inputs input - password - $value 1`] = ` -{ - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 101, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 100, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 12348, - "style": "width: 100px;height: 30px;position: fixed;left: 0px;top: 0px;", - "type": "password", - "value": "", - }, - "childNodes": [], - "id": 12348, - "tagName": "input", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, -} -`; - -exports[`replay/transform transform inputs input - progress - $value 1`] = ` -{ - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 104, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 103, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 12365, - "style": "width: 100px;height: 30px;position: fixed;left: 0px;top: 0px;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 102, - "style": "background-color: #f3f4ef;width: 100px;height: 30px;position: fixed;left: 0px;top: 0px;border: 4px solid #35373e;border-radius: 50%;border-top: 4px solid #fff;animation: spin 2s linear infinite;", - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 101, - "textContent": "@keyframes spin { 0% { transform: rotate(0deg); } 100% { transform: rotate(360deg); } }", - "type": 3, - }, - ], - "id": 100, - "tagName": "style", - "type": 2, - }, - ], - "id": 102, - "tagName": "div", - "type": 2, - }, - ], - "id": 12365, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, -} -`; - -exports[`replay/transform transform inputs input - progress - $value 2`] = ` -{ - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 101, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 100, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 12365, - "max": null, - "style": "width: 100px;height: 30px;position: fixed;left: 0px;top: 0px;", - "type": null, - "value": null, - }, - "childNodes": [], - "id": 12365, - "tagName": "progress", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, -} -`; - -exports[`replay/transform transform inputs input - progress - 0.75 1`] = ` -{ - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 101, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 100, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 12365, - "max": null, - "style": "width: 100px;height: 30px;position: fixed;left: 0px;top: 0px;", - "type": null, - "value": 0.75, - }, - "childNodes": [], - "id": 12365, - "tagName": "progress", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, -} -`; - -exports[`replay/transform transform inputs input - progress - 0.75 2`] = ` -{ - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 101, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 100, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 12365, - "max": 2.5, - "style": "width: 100px;height: 30px;position: fixed;left: 0px;top: 0px;", - "type": null, - "value": 0.75, - }, - "childNodes": [], - "id": 12365, - "tagName": "progress", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, -} -`; - -exports[`replay/transform transform inputs input - search - $value 1`] = ` -{ - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 101, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 100, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 12351, - "style": "width: 100px;height: 30px;position: fixed;left: 0px;top: 0px;", - "type": "search", - "value": "", - }, - "childNodes": [], - "id": 12351, - "tagName": "input", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, -} -`; - -exports[`replay/transform transform inputs input - select - hello 1`] = ` -{ - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 105, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 104, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 12365, - "style": "width: 100px;height: 30px;position: fixed;left: 0px;top: 0px;", - "type": "select", - "value": "hello", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 100, - "selected": true, - }, - "childNodes": [ - { - "id": 101, - "textContent": "hello", - "type": 3, - }, - ], - "id": 100, - "tagName": "option", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 102, - }, - "childNodes": [ - { - "id": 103, - "textContent": "world", - "type": 3, - }, - ], - "id": 102, - "tagName": "option", - "type": 2, - }, - ], - "id": 12365, - "tagName": "select", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, -} -`; - -exports[`replay/transform transform inputs input - tel - $value 1`] = ` -{ - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 101, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 100, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 12352, - "disabled": true, - "style": "width: 100px;height: 30px;position: fixed;left: 0px;top: 0px;", - "type": "tel", - "value": "", - }, - "childNodes": [], - "id": 12352, - "tagName": "input", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, -} -`; - -exports[`replay/transform transform inputs input - text - $value 1`] = ` -{ - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 101, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 100, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 12347, - "style": "width: 100px;height: 30px;position: fixed;left: 0px;top: 0px;", - "type": "text", - "value": "", - }, - "childNodes": [], - "id": 12347, - "tagName": "input", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, -} -`; - -exports[`replay/transform transform inputs input - text - hello 1`] = ` -{ - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 101, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 100, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 12346, - "style": "width: 100px;height: 30px;position: fixed;left: 0px;top: 0px;", - "type": "text", - "value": "hello", - }, - "childNodes": [], - "id": 12346, - "tagName": "input", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, -} -`; - -exports[`replay/transform transform inputs input - textArea - $value 1`] = ` -{ - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 101, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 100, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 12364, - "style": "width: 100px;height: 30px;position: fixed;left: 0px;top: 0px;", - "type": "textArea", - "value": "", - }, - "childNodes": [], - "id": 12364, - "tagName": "input", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, -} -`; - -exports[`replay/transform transform inputs input - textArea - hello 1`] = ` -{ - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 101, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 100, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 12363, - "style": "width: 100px;height: 30px;position: fixed;left: 0px;top: 0px;", - "type": "textArea", - "value": "hello", - }, - "childNodes": [], - "id": 12363, - "tagName": "input", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, -} -`; - -exports[`replay/transform transform inputs input - toggle - $value 1`] = ` -{ - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 106, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 105, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 104, - "style": "width: 100px;height: 30px;position: fixed;left: 0px;top: 0px;", - }, - "childNodes": [ - { - "id": 103, - "textContent": "first", - "type": 3, - }, - { - "attributes": { - "data-rrweb-id": 12357, - "style": "height:100%;flex:1;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 100, - "style": "position:relative;width:100%;height:100%;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 101, - "data-toggle-part": "slider", - "style": "position:absolute;top:33%;left:5%;display:inline-block;width:75%;height:33%;opacity: 0.2;border-radius:7.5%;background-color:#1d4aff;", - }, - "childNodes": [], - "id": 101, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 102, - "data-toggle-part": "handle", - "style": "position:absolute;top:1.5%;right:5%;display:flex;align-items:center;justify-content:center;width:40%;height:75%;cursor:inherit;border-radius:50%;background-color:#1d4aff;border:2px solid #1d4aff;", - }, - "childNodes": [], - "id": 102, - "tagName": "div", - "type": 2, - }, - ], - "id": 100, - "tagName": "div", - "type": 2, - }, - ], - "id": 12357, - "tagName": "div", - "type": 2, - }, - ], - "id": 104, - "tagName": "label", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, -} -`; - -exports[`replay/transform transform inputs input - toggle - $value 2`] = ` -{ - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 106, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 105, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 104, - "style": "width: 100px;height: 30px;position: fixed;left: 0px;top: 0px;", - }, - "childNodes": [ - { - "id": 103, - "textContent": "second", - "type": 3, - }, - { - "attributes": { - "data-rrweb-id": 12357, - "style": "height:100%;flex:1;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 100, - "style": "position:relative;width:100%;height:100%;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 101, - "data-toggle-part": "slider", - "style": "position:absolute;top:33%;left:5%;display:inline-block;width:75%;height:33%;opacity: 0.2;border-radius:7.5%;background-color:#f3f4ef;", - }, - "childNodes": [], - "id": 101, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 102, - "data-toggle-part": "handle", - "style": "position:absolute;top:1.5%;left:5%;display:flex;align-items:center;justify-content:center;width:40%;height:75%;cursor:inherit;border-radius:50%;background-color:#f3f4ef;border:2px solid #f3f4ef;", - }, - "childNodes": [], - "id": 102, - "tagName": "div", - "type": 2, - }, - ], - "id": 100, - "tagName": "div", - "type": 2, - }, - ], - "id": 12357, - "tagName": "div", - "type": 2, - }, - ], - "id": 104, - "tagName": "label", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, -} -`; - -exports[`replay/transform transform inputs input - toggle - $value 3`] = ` -{ - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 106, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 105, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 104, - "style": "width: 100px;height: 30px;position: fixed;left: 0px;top: 0px;", - }, - "childNodes": [ - { - "id": 103, - "textContent": "third", - "type": 3, - }, - { - "attributes": { - "data-rrweb-id": 12357, - "style": "height:100%;flex:1;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 100, - "style": "position:relative;width:100%;height:100%;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 101, - "data-toggle-part": "slider", - "style": "position:absolute;top:33%;left:5%;display:inline-block;width:75%;height:33%;opacity: 0.2;border-radius:7.5%;background-color:#1d4aff;", - }, - "childNodes": [], - "id": 101, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 102, - "data-toggle-part": "handle", - "style": "position:absolute;top:1.5%;right:5%;display:flex;align-items:center;justify-content:center;width:40%;height:75%;cursor:inherit;border-radius:50%;background-color:#1d4aff;border:2px solid #1d4aff;", - }, - "childNodes": [], - "id": 102, - "tagName": "div", - "type": 2, - }, - ], - "id": 100, - "tagName": "div", - "type": 2, - }, - ], - "id": 12357, - "tagName": "div", - "type": 2, - }, - ], - "id": 104, - "tagName": "label", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, -} -`; - -exports[`replay/transform transform inputs input - toggle - $value 4`] = ` -{ - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 104, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 103, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 12357, - "style": "width: 100px;height: 30px;position: fixed;left: 0px;top: 0px;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 100, - "style": "position:relative;width:100%;height:100%;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 101, - "data-toggle-part": "slider", - "style": "position:absolute;top:33%;left:5%;display:inline-block;width:75%;height:33%;opacity: 0.2;border-radius:7.5%;background-color:#1d4aff;", - }, - "childNodes": [], - "id": 101, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 102, - "data-toggle-part": "handle", - "style": "position:absolute;top:1.5%;right:5%;display:flex;align-items:center;justify-content:center;width:40%;height:75%;cursor:inherit;border-radius:50%;background-color:#1d4aff;border:2px solid #1d4aff;", - }, - "childNodes": [], - "id": 102, - "tagName": "div", - "type": 2, - }, - ], - "id": 100, - "tagName": "div", - "type": 2, - }, - ], - "id": 12357, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, -} -`; - -exports[`replay/transform transform inputs input - url - https://example.io 1`] = ` -{ - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 101, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 100, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 12352, - "style": "width: 100px;height: 30px;position: fixed;left: 0px;top: 0px;", - "type": "url", - "value": "https://example.io", - }, - "childNodes": [], - "id": 12352, - "tagName": "input", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, -} -`; - -exports[`replay/transform transform inputs input gets 0 padding by default but can be overridden 1`] = ` -{ - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 101, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 100, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 12359, - "style": "width: 100px;height: 30px;position: fixed;left: 0px;top: 0px;", - "type": "text", - "value": "", - }, - "childNodes": [], - "id": 12359, - "tagName": "input", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 12361, - "style": "width: 100px;height: 30px;position: fixed;left: 0px;top: 0px;padding-left: 16px;padding-right: 16px;", - "type": "text", - "value": "", - }, - "childNodes": [], - "id": 12361, - "tagName": "input", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, -} -`; - -exports[`replay/transform transform inputs isolated add mutation 1`] = ` -{ - "data": { - "adds": [ - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 12365, - "style": "width: 100px;height: 30px;position: fixed;left: 0px;top: 0px;", - }, - "childNodes": [], - "id": 12365, - "tagName": "div", - "type": 2, - }, - "parentId": 54321, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 201, - "style": "position: relative;display: flex;flex-direction: row;padding: 2px 4px;", - }, - "childNodes": [], - "id": 201, - "tagName": "div", - "type": 2, - }, - "parentId": 12365, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 153, - "fill": "currentColor", - "style": "height: 100%;overflow-clip-margin: content-box;overflow:hidden;", - "viewBox": "0 0 24 24", - }, - "childNodes": [], - "id": 153, - "isSVG": true, - "tagName": "svg", - "type": 2, - }, - "parentId": 201, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 154, - }, - "childNodes": [], - "id": 154, - "isSVG": true, - "tagName": "title", - "type": 2, - }, - "parentId": 153, - }, - { - "nextId": null, - "node": { - "attributes": { - "d": "M12,17.27L18.18,21L16.54,13.97L22,9.24L14.81,8.62L12,2L9.19,8.62L2,9.24L7.45,13.97L5.82,21L12,17.27Z", - "data-rrweb-id": 155, - }, - "childNodes": [], - "id": 155, - "isSVG": true, - "tagName": "path", - "type": 2, - }, - "parentId": 153, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 157, - "fill": "currentColor", - "style": "height: 100%;overflow-clip-margin: content-box;overflow:hidden;", - "viewBox": "0 0 24 24", - }, - "childNodes": [], - "id": 157, - "isSVG": true, - "tagName": "svg", - "type": 2, - }, - "parentId": 201, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 158, - }, - "childNodes": [], - "id": 158, - "isSVG": true, - "tagName": "title", - "type": 2, - }, - "parentId": 157, - }, - { - "nextId": null, - "node": { - "attributes": { - "d": "M12,17.27L18.18,21L16.54,13.97L22,9.24L14.81,8.62L12,2L9.19,8.62L2,9.24L7.45,13.97L5.82,21L12,17.27Z", - "data-rrweb-id": 159, - }, - "childNodes": [], - "id": 159, - "isSVG": true, - "tagName": "path", - "type": 2, - }, - "parentId": 157, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 161, - "fill": "currentColor", - "style": "height: 100%;overflow-clip-margin: content-box;overflow:hidden;", - "viewBox": "0 0 24 24", - }, - "childNodes": [], - "id": 161, - "isSVG": true, - "tagName": "svg", - "type": 2, - }, - "parentId": 201, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 162, - }, - "childNodes": [], - "id": 162, - "isSVG": true, - "tagName": "title", - "type": 2, - }, - "parentId": 161, - }, - { - "nextId": null, - "node": { - "attributes": { - "d": "M12,17.27L18.18,21L16.54,13.97L22,9.24L14.81,8.62L12,2L9.19,8.62L2,9.24L7.45,13.97L5.82,21L12,17.27Z", - "data-rrweb-id": 163, - }, - "childNodes": [], - "id": 163, - "isSVG": true, - "tagName": "path", - "type": 2, - }, - "parentId": 161, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 165, - "fill": "currentColor", - "style": "height: 100%;overflow-clip-margin: content-box;overflow:hidden;", - "viewBox": "0 0 24 24", - }, - "childNodes": [], - "id": 165, - "isSVG": true, - "tagName": "svg", - "type": 2, - }, - "parentId": 201, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 166, - }, - "childNodes": [], - "id": 166, - "isSVG": true, - "tagName": "title", - "type": 2, - }, - "parentId": 165, - }, - { - "nextId": null, - "node": { - "attributes": { - "d": "M12,17.27L18.18,21L16.54,13.97L22,9.24L14.81,8.62L12,2L9.19,8.62L2,9.24L7.45,13.97L5.82,21L12,17.27Z", - "data-rrweb-id": 167, - }, - "childNodes": [], - "id": 167, - "isSVG": true, - "tagName": "path", - "type": 2, - }, - "parentId": 165, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 169, - "fill": "currentColor", - "style": "height: 100%;overflow-clip-margin: content-box;overflow:hidden;", - "viewBox": "0 0 24 24", - }, - "childNodes": [], - "id": 169, - "isSVG": true, - "tagName": "svg", - "type": 2, - }, - "parentId": 201, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 170, - }, - "childNodes": [], - "id": 170, - "isSVG": true, - "tagName": "title", - "type": 2, - }, - "parentId": 169, - }, - { - "nextId": null, - "node": { - "attributes": { - "d": "M12,17.27L18.18,21L16.54,13.97L22,9.24L14.81,8.62L12,2L9.19,8.62L2,9.24L7.45,13.97L5.82,21L12,17.27Z", - "data-rrweb-id": 171, - }, - "childNodes": [], - "id": 171, - "isSVG": true, - "tagName": "path", - "type": 2, - }, - "parentId": 169, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 173, - "fill": "currentColor", - "style": "height: 100%;overflow-clip-margin: content-box;overflow:hidden;", - "viewBox": "0 0 24 24", - }, - "childNodes": [], - "id": 173, - "isSVG": true, - "tagName": "svg", - "type": 2, - }, - "parentId": 201, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 174, - }, - "childNodes": [], - "id": 174, - "isSVG": true, - "tagName": "title", - "type": 2, - }, - "parentId": 173, - }, - { - "nextId": null, - "node": { - "id": 176, - "textContent": "filled star", - "type": 3, - }, - "parentId": 174, - }, - { - "nextId": null, - "node": { - "attributes": { - "d": "M12,17.27L18.18,21L16.54,13.97L22,9.24L14.81,8.62L12,2L9.19,8.62L2,9.24L7.45,13.97L5.82,21L12,17.27Z", - "data-rrweb-id": 175, - }, - "childNodes": [], - "id": 175, - "isSVG": true, - "tagName": "path", - "type": 2, - }, - "parentId": 173, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 177, - "fill": "currentColor", - "style": "height: 100%;overflow-clip-margin: content-box;overflow:hidden;", - "viewBox": "0 0 24 24", - }, - "childNodes": [], - "id": 177, - "isSVG": true, - "tagName": "svg", - "type": 2, - }, - "parentId": 201, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 178, - }, - "childNodes": [], - "id": 178, - "isSVG": true, - "tagName": "title", - "type": 2, - }, - "parentId": 177, - }, - { - "nextId": null, - "node": { - "id": 180, - "textContent": "half-filled star", - "type": 3, - }, - "parentId": 178, - }, - { - "nextId": null, - "node": { - "attributes": { - "d": "M12,15.4V6.1L13.71,10.13L18.09,10.5L14.77,13.39L15.76,17.67M22,9.24L14.81,8.63L12,2L9.19,8.63L2,9.24L7.45,13.97L5.82,21L12,17.27L18.18,21L16.54,13.97L22,9.24Z", - "data-rrweb-id": 179, - }, - "childNodes": [], - "id": 179, - "isSVG": true, - "tagName": "path", - "type": 2, - }, - "parentId": 177, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 181, - "fill": "currentColor", - "style": "height: 100%;overflow-clip-margin: content-box;overflow:hidden;", - "viewBox": "0 0 24 24", - }, - "childNodes": [], - "id": 181, - "isSVG": true, - "tagName": "svg", - "type": 2, - }, - "parentId": 201, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 182, - }, - "childNodes": [], - "id": 182, - "isSVG": true, - "tagName": "title", - "type": 2, - }, - "parentId": 181, - }, - { - "nextId": null, - "node": { - "attributes": { - "d": "M12,15.39L8.24,17.66L9.23,13.38L5.91,10.5L10.29,10.13L12,6.09L13.71,10.13L18.09,10.5L14.77,13.38L15.76,17.66M22,9.24L14.81,8.63L12,2L9.19,8.63L2,9.24L7.45,13.97L5.82,21L12,17.27L18.18,21L16.54,13.97L22,9.24Z", - "data-rrweb-id": 183, - }, - "childNodes": [], - "id": 183, - "isSVG": true, - "tagName": "path", - "type": 2, - }, - "parentId": 181, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 185, - "fill": "currentColor", - "style": "height: 100%;overflow-clip-margin: content-box;overflow:hidden;", - "viewBox": "0 0 24 24", - }, - "childNodes": [], - "id": 185, - "isSVG": true, - "tagName": "svg", - "type": 2, - }, - "parentId": 201, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 186, - }, - "childNodes": [], - "id": 186, - "isSVG": true, - "tagName": "title", - "type": 2, - }, - "parentId": 185, - }, - { - "nextId": null, - "node": { - "attributes": { - "d": "M12,15.39L8.24,17.66L9.23,13.38L5.91,10.5L10.29,10.13L12,6.09L13.71,10.13L18.09,10.5L14.77,13.38L15.76,17.66M22,9.24L14.81,8.63L12,2L9.19,8.63L2,9.24L7.45,13.97L5.82,21L12,17.27L18.18,21L16.54,13.97L22,9.24Z", - "data-rrweb-id": 187, - }, - "childNodes": [], - "id": 187, - "isSVG": true, - "tagName": "path", - "type": 2, - }, - "parentId": 185, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 189, - "fill": "currentColor", - "style": "height: 100%;overflow-clip-margin: content-box;overflow:hidden;", - "viewBox": "0 0 24 24", - }, - "childNodes": [], - "id": 189, - "isSVG": true, - "tagName": "svg", - "type": 2, - }, - "parentId": 201, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 190, - }, - "childNodes": [], - "id": 190, - "isSVG": true, - "tagName": "title", - "type": 2, - }, - "parentId": 189, - }, - { - "nextId": null, - "node": { - "attributes": { - "d": "M12,15.39L8.24,17.66L9.23,13.38L5.91,10.5L10.29,10.13L12,6.09L13.71,10.13L18.09,10.5L14.77,13.38L15.76,17.66M22,9.24L14.81,8.63L12,2L9.19,8.63L2,9.24L7.45,13.97L5.82,21L12,17.27L18.18,21L16.54,13.97L22,9.24Z", - "data-rrweb-id": 191, - }, - "childNodes": [], - "id": 191, - "isSVG": true, - "tagName": "path", - "type": 2, - }, - "parentId": 189, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 193, - "fill": "currentColor", - "style": "height: 100%;overflow-clip-margin: content-box;overflow:hidden;", - "viewBox": "0 0 24 24", - }, - "childNodes": [], - "id": 193, - "isSVG": true, - "tagName": "svg", - "type": 2, - }, - "parentId": 201, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 194, - }, - "childNodes": [], - "id": 194, - "isSVG": true, - "tagName": "title", - "type": 2, - }, - "parentId": 193, - }, - { - "nextId": null, - "node": { - "attributes": { - "d": "M12,15.39L8.24,17.66L9.23,13.38L5.91,10.5L10.29,10.13L12,6.09L13.71,10.13L18.09,10.5L14.77,13.38L15.76,17.66M22,9.24L14.81,8.63L12,2L9.19,8.63L2,9.24L7.45,13.97L5.82,21L12,17.27L18.18,21L16.54,13.97L22,9.24Z", - "data-rrweb-id": 195, - }, - "childNodes": [], - "id": 195, - "isSVG": true, - "tagName": "path", - "type": 2, - }, - "parentId": 193, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 197, - "fill": "currentColor", - "style": "height: 100%;overflow-clip-margin: content-box;overflow:hidden;", - "viewBox": "0 0 24 24", - }, - "childNodes": [], - "id": 197, - "isSVG": true, - "tagName": "svg", - "type": 2, - }, - "parentId": 201, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 198, - }, - "childNodes": [], - "id": 198, - "isSVG": true, - "tagName": "title", - "type": 2, - }, - "parentId": 197, - }, - { - "nextId": null, - "node": { - "id": 200, - "textContent": "empty star", - "type": 3, - }, - "parentId": 198, - }, - { - "nextId": null, - "node": { - "attributes": { - "d": "M12,15.39L8.24,17.66L9.23,13.38L5.91,10.5L10.29,10.13L12,6.09L13.71,10.13L18.09,10.5L14.77,13.38L15.76,17.66M22,9.24L14.81,8.63L12,2L9.19,8.63L2,9.24L7.45,13.97L5.82,21L12,17.27L18.18,21L16.54,13.97L22,9.24Z", - "data-rrweb-id": 199, - }, - "childNodes": [], - "id": 199, - "isSVG": true, - "tagName": "path", - "type": 2, - }, - "parentId": 197, - }, - ], - "attributes": [], - "removes": [], - "source": 0, - "texts": [], - }, - "timestamp": 1, - "type": 3, -} -`; - -exports[`replay/transform transform inputs isolated remove mutation 1`] = ` -{ - "data": { - "removes": [ - { - "id": 12345, - "parentId": 54321, - }, - ], - "source": 0, - }, - "timestamp": 1, - "type": 3, -} -`; - -exports[`replay/transform transform inputs isolated update mutation 1`] = ` -{ - "data": { - "adds": [ - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 12365, - "style": "width: 100px;height: 30px;position: fixed;left: 0px;top: 0px;", - }, - "childNodes": [], - "id": 12365, - "tagName": "div", - "type": 2, - }, - "parentId": 54321, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 250, - "style": "position: relative;display: flex;flex-direction: row;padding: 2px 4px;", - }, - "childNodes": [], - "id": 250, - "tagName": "div", - "type": 2, - }, - "parentId": 12365, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 202, - "fill": "currentColor", - "style": "height: 100%;overflow-clip-margin: content-box;overflow:hidden;", - "viewBox": "0 0 24 24", - }, - "childNodes": [], - "id": 202, - "isSVG": true, - "tagName": "svg", - "type": 2, - }, - "parentId": 250, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 203, - }, - "childNodes": [], - "id": 203, - "isSVG": true, - "tagName": "title", - "type": 2, - }, - "parentId": 202, - }, - { - "nextId": null, - "node": { - "attributes": { - "d": "M12,17.27L18.18,21L16.54,13.97L22,9.24L14.81,8.62L12,2L9.19,8.62L2,9.24L7.45,13.97L5.82,21L12,17.27Z", - "data-rrweb-id": 204, - }, - "childNodes": [], - "id": 204, - "isSVG": true, - "tagName": "path", - "type": 2, - }, - "parentId": 202, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 206, - "fill": "currentColor", - "style": "height: 100%;overflow-clip-margin: content-box;overflow:hidden;", - "viewBox": "0 0 24 24", - }, - "childNodes": [], - "id": 206, - "isSVG": true, - "tagName": "svg", - "type": 2, - }, - "parentId": 250, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 207, - }, - "childNodes": [], - "id": 207, - "isSVG": true, - "tagName": "title", - "type": 2, - }, - "parentId": 206, - }, - { - "nextId": null, - "node": { - "attributes": { - "d": "M12,17.27L18.18,21L16.54,13.97L22,9.24L14.81,8.62L12,2L9.19,8.62L2,9.24L7.45,13.97L5.82,21L12,17.27Z", - "data-rrweb-id": 208, - }, - "childNodes": [], - "id": 208, - "isSVG": true, - "tagName": "path", - "type": 2, - }, - "parentId": 206, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 210, - "fill": "currentColor", - "style": "height: 100%;overflow-clip-margin: content-box;overflow:hidden;", - "viewBox": "0 0 24 24", - }, - "childNodes": [], - "id": 210, - "isSVG": true, - "tagName": "svg", - "type": 2, - }, - "parentId": 250, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 211, - }, - "childNodes": [], - "id": 211, - "isSVG": true, - "tagName": "title", - "type": 2, - }, - "parentId": 210, - }, - { - "nextId": null, - "node": { - "attributes": { - "d": "M12,17.27L18.18,21L16.54,13.97L22,9.24L14.81,8.62L12,2L9.19,8.62L2,9.24L7.45,13.97L5.82,21L12,17.27Z", - "data-rrweb-id": 212, - }, - "childNodes": [], - "id": 212, - "isSVG": true, - "tagName": "path", - "type": 2, - }, - "parentId": 210, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 214, - "fill": "currentColor", - "style": "height: 100%;overflow-clip-margin: content-box;overflow:hidden;", - "viewBox": "0 0 24 24", - }, - "childNodes": [], - "id": 214, - "isSVG": true, - "tagName": "svg", - "type": 2, - }, - "parentId": 250, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 215, - }, - "childNodes": [], - "id": 215, - "isSVG": true, - "tagName": "title", - "type": 2, - }, - "parentId": 214, - }, - { - "nextId": null, - "node": { - "attributes": { - "d": "M12,17.27L18.18,21L16.54,13.97L22,9.24L14.81,8.62L12,2L9.19,8.62L2,9.24L7.45,13.97L5.82,21L12,17.27Z", - "data-rrweb-id": 216, - }, - "childNodes": [], - "id": 216, - "isSVG": true, - "tagName": "path", - "type": 2, - }, - "parentId": 214, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 218, - "fill": "currentColor", - "style": "height: 100%;overflow-clip-margin: content-box;overflow:hidden;", - "viewBox": "0 0 24 24", - }, - "childNodes": [], - "id": 218, - "isSVG": true, - "tagName": "svg", - "type": 2, - }, - "parentId": 250, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 219, - }, - "childNodes": [], - "id": 219, - "isSVG": true, - "tagName": "title", - "type": 2, - }, - "parentId": 218, - }, - { - "nextId": null, - "node": { - "attributes": { - "d": "M12,17.27L18.18,21L16.54,13.97L22,9.24L14.81,8.62L12,2L9.19,8.62L2,9.24L7.45,13.97L5.82,21L12,17.27Z", - "data-rrweb-id": 220, - }, - "childNodes": [], - "id": 220, - "isSVG": true, - "tagName": "path", - "type": 2, - }, - "parentId": 218, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 222, - "fill": "currentColor", - "style": "height: 100%;overflow-clip-margin: content-box;overflow:hidden;", - "viewBox": "0 0 24 24", - }, - "childNodes": [], - "id": 222, - "isSVG": true, - "tagName": "svg", - "type": 2, - }, - "parentId": 250, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 223, - }, - "childNodes": [], - "id": 223, - "isSVG": true, - "tagName": "title", - "type": 2, - }, - "parentId": 222, - }, - { - "nextId": null, - "node": { - "id": 225, - "textContent": "filled star", - "type": 3, - }, - "parentId": 223, - }, - { - "nextId": null, - "node": { - "attributes": { - "d": "M12,17.27L18.18,21L16.54,13.97L22,9.24L14.81,8.62L12,2L9.19,8.62L2,9.24L7.45,13.97L5.82,21L12,17.27Z", - "data-rrweb-id": 224, - }, - "childNodes": [], - "id": 224, - "isSVG": true, - "tagName": "path", - "type": 2, - }, - "parentId": 222, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 226, - "fill": "currentColor", - "style": "height: 100%;overflow-clip-margin: content-box;overflow:hidden;", - "viewBox": "0 0 24 24", - }, - "childNodes": [], - "id": 226, - "isSVG": true, - "tagName": "svg", - "type": 2, - }, - "parentId": 250, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 227, - }, - "childNodes": [], - "id": 227, - "isSVG": true, - "tagName": "title", - "type": 2, - }, - "parentId": 226, - }, - { - "nextId": null, - "node": { - "id": 229, - "textContent": "half-filled star", - "type": 3, - }, - "parentId": 227, - }, - { - "nextId": null, - "node": { - "attributes": { - "d": "M12,15.4V6.1L13.71,10.13L18.09,10.5L14.77,13.39L15.76,17.67M22,9.24L14.81,8.63L12,2L9.19,8.63L2,9.24L7.45,13.97L5.82,21L12,17.27L18.18,21L16.54,13.97L22,9.24Z", - "data-rrweb-id": 228, - }, - "childNodes": [], - "id": 228, - "isSVG": true, - "tagName": "path", - "type": 2, - }, - "parentId": 226, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 230, - "fill": "currentColor", - "style": "height: 100%;overflow-clip-margin: content-box;overflow:hidden;", - "viewBox": "0 0 24 24", - }, - "childNodes": [], - "id": 230, - "isSVG": true, - "tagName": "svg", - "type": 2, - }, - "parentId": 250, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 231, - }, - "childNodes": [], - "id": 231, - "isSVG": true, - "tagName": "title", - "type": 2, - }, - "parentId": 230, - }, - { - "nextId": null, - "node": { - "attributes": { - "d": "M12,15.39L8.24,17.66L9.23,13.38L5.91,10.5L10.29,10.13L12,6.09L13.71,10.13L18.09,10.5L14.77,13.38L15.76,17.66M22,9.24L14.81,8.63L12,2L9.19,8.63L2,9.24L7.45,13.97L5.82,21L12,17.27L18.18,21L16.54,13.97L22,9.24Z", - "data-rrweb-id": 232, - }, - "childNodes": [], - "id": 232, - "isSVG": true, - "tagName": "path", - "type": 2, - }, - "parentId": 230, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 234, - "fill": "currentColor", - "style": "height: 100%;overflow-clip-margin: content-box;overflow:hidden;", - "viewBox": "0 0 24 24", - }, - "childNodes": [], - "id": 234, - "isSVG": true, - "tagName": "svg", - "type": 2, - }, - "parentId": 250, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 235, - }, - "childNodes": [], - "id": 235, - "isSVG": true, - "tagName": "title", - "type": 2, - }, - "parentId": 234, - }, - { - "nextId": null, - "node": { - "attributes": { - "d": "M12,15.39L8.24,17.66L9.23,13.38L5.91,10.5L10.29,10.13L12,6.09L13.71,10.13L18.09,10.5L14.77,13.38L15.76,17.66M22,9.24L14.81,8.63L12,2L9.19,8.63L2,9.24L7.45,13.97L5.82,21L12,17.27L18.18,21L16.54,13.97L22,9.24Z", - "data-rrweb-id": 236, - }, - "childNodes": [], - "id": 236, - "isSVG": true, - "tagName": "path", - "type": 2, - }, - "parentId": 234, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 238, - "fill": "currentColor", - "style": "height: 100%;overflow-clip-margin: content-box;overflow:hidden;", - "viewBox": "0 0 24 24", - }, - "childNodes": [], - "id": 238, - "isSVG": true, - "tagName": "svg", - "type": 2, - }, - "parentId": 250, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 239, - }, - "childNodes": [], - "id": 239, - "isSVG": true, - "tagName": "title", - "type": 2, - }, - "parentId": 238, - }, - { - "nextId": null, - "node": { - "attributes": { - "d": "M12,15.39L8.24,17.66L9.23,13.38L5.91,10.5L10.29,10.13L12,6.09L13.71,10.13L18.09,10.5L14.77,13.38L15.76,17.66M22,9.24L14.81,8.63L12,2L9.19,8.63L2,9.24L7.45,13.97L5.82,21L12,17.27L18.18,21L16.54,13.97L22,9.24Z", - "data-rrweb-id": 240, - }, - "childNodes": [], - "id": 240, - "isSVG": true, - "tagName": "path", - "type": 2, - }, - "parentId": 238, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 242, - "fill": "currentColor", - "style": "height: 100%;overflow-clip-margin: content-box;overflow:hidden;", - "viewBox": "0 0 24 24", - }, - "childNodes": [], - "id": 242, - "isSVG": true, - "tagName": "svg", - "type": 2, - }, - "parentId": 250, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 243, - }, - "childNodes": [], - "id": 243, - "isSVG": true, - "tagName": "title", - "type": 2, - }, - "parentId": 242, - }, - { - "nextId": null, - "node": { - "attributes": { - "d": "M12,15.39L8.24,17.66L9.23,13.38L5.91,10.5L10.29,10.13L12,6.09L13.71,10.13L18.09,10.5L14.77,13.38L15.76,17.66M22,9.24L14.81,8.63L12,2L9.19,8.63L2,9.24L7.45,13.97L5.82,21L12,17.27L18.18,21L16.54,13.97L22,9.24Z", - "data-rrweb-id": 244, - }, - "childNodes": [], - "id": 244, - "isSVG": true, - "tagName": "path", - "type": 2, - }, - "parentId": 242, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 246, - "fill": "currentColor", - "style": "height: 100%;overflow-clip-margin: content-box;overflow:hidden;", - "viewBox": "0 0 24 24", - }, - "childNodes": [], - "id": 246, - "isSVG": true, - "tagName": "svg", - "type": 2, - }, - "parentId": 250, - }, - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 247, - }, - "childNodes": [], - "id": 247, - "isSVG": true, - "tagName": "title", - "type": 2, - }, - "parentId": 246, - }, - { - "nextId": null, - "node": { - "id": 249, - "textContent": "empty star", - "type": 3, - }, - "parentId": 247, - }, - { - "nextId": null, - "node": { - "attributes": { - "d": "M12,15.39L8.24,17.66L9.23,13.38L5.91,10.5L10.29,10.13L12,6.09L13.71,10.13L18.09,10.5L14.77,13.38L15.76,17.66M22,9.24L14.81,8.63L12,2L9.19,8.63L2,9.24L7.45,13.97L5.82,21L12,17.27L18.18,21L16.54,13.97L22,9.24Z", - "data-rrweb-id": 248, - }, - "childNodes": [], - "id": 248, - "isSVG": true, - "tagName": "path", - "type": 2, - }, - "parentId": 246, - }, - ], - "attributes": [], - "removes": [ - { - "id": 12365, - "parentId": 54321, - }, - ], - "source": 0, - "texts": [], - }, - "timestamp": 1, - "type": 3, -} -`; - -exports[`replay/transform transform inputs open keyboard custom event 1`] = ` -{ - "data": { - "adds": [ - { - "nextId": null, - "node": { - "attributes": { - "data-rrweb-id": 10, - "style": "background-color: #f3f4ef;background-image: url("");background-size: auto;background-repeat: unset;color: #35373e;width: 100vw;height: 150px;bottom: 0;position: fixed;align-items: center;justify-content: center;display: flex;", - }, - "childNodes": [ - { - "id": 151, - "textContent": "keyboard", - "type": 3, - }, - ], - "id": 10, - "tagName": "div", - "type": 2, - }, - "parentId": 9, - }, - { - "nextId": null, - "node": { - "id": 152, - "textContent": "keyboard", - "type": 3, - }, - "parentId": 10, - }, - ], - "attributes": [], - "removes": [], - "source": 0, - "texts": [], - }, - "timestamp": 1, - "type": 3, -} -`; - -exports[`replay/transform transform inputs placeholder - $inputType - $value 1`] = ` -{ - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 102, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 101, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 12365, - "style": "background-color: #f3f4ef;background-image: url("");background-size: auto;background-repeat: unset;color: #35373e;width: 100px;height: 30px;position: fixed;left: 0px;top: 0px;align-items: center;justify-content: center;display: flex;", - }, - "childNodes": [ - { - "id": 100, - "textContent": "hello", - "type": 3, - }, - ], - "id": 12365, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, -} -`; - -exports[`replay/transform transform inputs progress rating 1`] = ` -{ - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 150, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 149, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 12365, - "style": "width: 100px;height: 30px;position: fixed;left: 0px;top: 0px;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 148, - "style": "position: relative;display: flex;flex-direction: row;padding: 2px 4px;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 100, - "fill": "currentColor", - "style": "height: 100%;overflow-clip-margin: content-box;overflow:hidden;", - "viewBox": "0 0 24 24", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 101, - }, - "childNodes": [ - { - "id": 103, - "textContent": "filled star", - "type": 3, - }, - ], - "id": 101, - "isSVG": true, - "tagName": "title", - "type": 2, - }, - { - "attributes": { - "d": "M12,17.27L18.18,21L16.54,13.97L22,9.24L14.81,8.62L12,2L9.19,8.62L2,9.24L7.45,13.97L5.82,21L12,17.27Z", - "data-rrweb-id": 102, - }, - "childNodes": [], - "id": 102, - "isSVG": true, - "tagName": "path", - "type": 2, - }, - ], - "id": 100, - "isSVG": true, - "tagName": "svg", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 104, - "fill": "currentColor", - "style": "height: 100%;overflow-clip-margin: content-box;overflow:hidden;", - "viewBox": "0 0 24 24", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 105, - }, - "childNodes": [ - { - "id": 107, - "textContent": "filled star", - "type": 3, - }, - ], - "id": 105, - "isSVG": true, - "tagName": "title", - "type": 2, - }, - { - "attributes": { - "d": "M12,17.27L18.18,21L16.54,13.97L22,9.24L14.81,8.62L12,2L9.19,8.62L2,9.24L7.45,13.97L5.82,21L12,17.27Z", - "data-rrweb-id": 106, - }, - "childNodes": [], - "id": 106, - "isSVG": true, - "tagName": "path", - "type": 2, - }, - ], - "id": 104, - "isSVG": true, - "tagName": "svg", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 108, - "fill": "currentColor", - "style": "height: 100%;overflow-clip-margin: content-box;overflow:hidden;", - "viewBox": "0 0 24 24", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 109, - }, - "childNodes": [ - { - "id": 111, - "textContent": "filled star", - "type": 3, - }, - ], - "id": 109, - "isSVG": true, - "tagName": "title", - "type": 2, - }, - { - "attributes": { - "d": "M12,17.27L18.18,21L16.54,13.97L22,9.24L14.81,8.62L12,2L9.19,8.62L2,9.24L7.45,13.97L5.82,21L12,17.27Z", - "data-rrweb-id": 110, - }, - "childNodes": [], - "id": 110, - "isSVG": true, - "tagName": "path", - "type": 2, - }, - ], - "id": 108, - "isSVG": true, - "tagName": "svg", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 112, - "fill": "currentColor", - "style": "height: 100%;overflow-clip-margin: content-box;overflow:hidden;", - "viewBox": "0 0 24 24", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 113, - }, - "childNodes": [ - { - "id": 115, - "textContent": "filled star", - "type": 3, - }, - ], - "id": 113, - "isSVG": true, - "tagName": "title", - "type": 2, - }, - { - "attributes": { - "d": "M12,17.27L18.18,21L16.54,13.97L22,9.24L14.81,8.62L12,2L9.19,8.62L2,9.24L7.45,13.97L5.82,21L12,17.27Z", - "data-rrweb-id": 114, - }, - "childNodes": [], - "id": 114, - "isSVG": true, - "tagName": "path", - "type": 2, - }, - ], - "id": 112, - "isSVG": true, - "tagName": "svg", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 116, - "fill": "currentColor", - "style": "height: 100%;overflow-clip-margin: content-box;overflow:hidden;", - "viewBox": "0 0 24 24", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 117, - }, - "childNodes": [ - { - "id": 119, - "textContent": "filled star", - "type": 3, - }, - ], - "id": 117, - "isSVG": true, - "tagName": "title", - "type": 2, - }, - { - "attributes": { - "d": "M12,17.27L18.18,21L16.54,13.97L22,9.24L14.81,8.62L12,2L9.19,8.62L2,9.24L7.45,13.97L5.82,21L12,17.27Z", - "data-rrweb-id": 118, - }, - "childNodes": [], - "id": 118, - "isSVG": true, - "tagName": "path", - "type": 2, - }, - ], - "id": 116, - "isSVG": true, - "tagName": "svg", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 120, - "fill": "currentColor", - "style": "height: 100%;overflow-clip-margin: content-box;overflow:hidden;", - "viewBox": "0 0 24 24", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 121, - }, - "childNodes": [ - { - "id": 123, - "textContent": "filled star", - "type": 3, - }, - ], - "id": 121, - "isSVG": true, - "tagName": "title", - "type": 2, - }, - { - "attributes": { - "d": "M12,17.27L18.18,21L16.54,13.97L22,9.24L14.81,8.62L12,2L9.19,8.62L2,9.24L7.45,13.97L5.82,21L12,17.27Z", - "data-rrweb-id": 122, - }, - "childNodes": [], - "id": 122, - "isSVG": true, - "tagName": "path", - "type": 2, - }, - ], - "id": 120, - "isSVG": true, - "tagName": "svg", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 124, - "fill": "currentColor", - "style": "height: 100%;overflow-clip-margin: content-box;overflow:hidden;", - "viewBox": "0 0 24 24", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 125, - }, - "childNodes": [ - { - "id": 127, - "textContent": "half-filled star", - "type": 3, - }, - ], - "id": 125, - "isSVG": true, - "tagName": "title", - "type": 2, - }, - { - "attributes": { - "d": "M12,15.4V6.1L13.71,10.13L18.09,10.5L14.77,13.39L15.76,17.67M22,9.24L14.81,8.63L12,2L9.19,8.63L2,9.24L7.45,13.97L5.82,21L12,17.27L18.18,21L16.54,13.97L22,9.24Z", - "data-rrweb-id": 126, - }, - "childNodes": [], - "id": 126, - "isSVG": true, - "tagName": "path", - "type": 2, - }, - ], - "id": 124, - "isSVG": true, - "tagName": "svg", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 128, - "fill": "currentColor", - "style": "height: 100%;overflow-clip-margin: content-box;overflow:hidden;", - "viewBox": "0 0 24 24", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 129, - }, - "childNodes": [ - { - "id": 131, - "textContent": "empty star", - "type": 3, - }, - ], - "id": 129, - "isSVG": true, - "tagName": "title", - "type": 2, - }, - { - "attributes": { - "d": "M12,15.39L8.24,17.66L9.23,13.38L5.91,10.5L10.29,10.13L12,6.09L13.71,10.13L18.09,10.5L14.77,13.38L15.76,17.66M22,9.24L14.81,8.63L12,2L9.19,8.63L2,9.24L7.45,13.97L5.82,21L12,17.27L18.18,21L16.54,13.97L22,9.24Z", - "data-rrweb-id": 130, - }, - "childNodes": [], - "id": 130, - "isSVG": true, - "tagName": "path", - "type": 2, - }, - ], - "id": 128, - "isSVG": true, - "tagName": "svg", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 132, - "fill": "currentColor", - "style": "height: 100%;overflow-clip-margin: content-box;overflow:hidden;", - "viewBox": "0 0 24 24", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 133, - }, - "childNodes": [ - { - "id": 135, - "textContent": "empty star", - "type": 3, - }, - ], - "id": 133, - "isSVG": true, - "tagName": "title", - "type": 2, - }, - { - "attributes": { - "d": "M12,15.39L8.24,17.66L9.23,13.38L5.91,10.5L10.29,10.13L12,6.09L13.71,10.13L18.09,10.5L14.77,13.38L15.76,17.66M22,9.24L14.81,8.63L12,2L9.19,8.63L2,9.24L7.45,13.97L5.82,21L12,17.27L18.18,21L16.54,13.97L22,9.24Z", - "data-rrweb-id": 134, - }, - "childNodes": [], - "id": 134, - "isSVG": true, - "tagName": "path", - "type": 2, - }, - ], - "id": 132, - "isSVG": true, - "tagName": "svg", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 136, - "fill": "currentColor", - "style": "height: 100%;overflow-clip-margin: content-box;overflow:hidden;", - "viewBox": "0 0 24 24", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 137, - }, - "childNodes": [ - { - "id": 139, - "textContent": "empty star", - "type": 3, - }, - ], - "id": 137, - "isSVG": true, - "tagName": "title", - "type": 2, - }, - { - "attributes": { - "d": "M12,15.39L8.24,17.66L9.23,13.38L5.91,10.5L10.29,10.13L12,6.09L13.71,10.13L18.09,10.5L14.77,13.38L15.76,17.66M22,9.24L14.81,8.63L12,2L9.19,8.63L2,9.24L7.45,13.97L5.82,21L12,17.27L18.18,21L16.54,13.97L22,9.24Z", - "data-rrweb-id": 138, - }, - "childNodes": [], - "id": 138, - "isSVG": true, - "tagName": "path", - "type": 2, - }, - ], - "id": 136, - "isSVG": true, - "tagName": "svg", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 140, - "fill": "currentColor", - "style": "height: 100%;overflow-clip-margin: content-box;overflow:hidden;", - "viewBox": "0 0 24 24", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 141, - }, - "childNodes": [ - { - "id": 143, - "textContent": "empty star", - "type": 3, - }, - ], - "id": 141, - "isSVG": true, - "tagName": "title", - "type": 2, - }, - { - "attributes": { - "d": "M12,15.39L8.24,17.66L9.23,13.38L5.91,10.5L10.29,10.13L12,6.09L13.71,10.13L18.09,10.5L14.77,13.38L15.76,17.66M22,9.24L14.81,8.63L12,2L9.19,8.63L2,9.24L7.45,13.97L5.82,21L12,17.27L18.18,21L16.54,13.97L22,9.24Z", - "data-rrweb-id": 142, - }, - "childNodes": [], - "id": 142, - "isSVG": true, - "tagName": "path", - "type": 2, - }, - ], - "id": 140, - "isSVG": true, - "tagName": "svg", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 144, - "fill": "currentColor", - "style": "height: 100%;overflow-clip-margin: content-box;overflow:hidden;", - "viewBox": "0 0 24 24", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 145, - }, - "childNodes": [ - { - "id": 147, - "textContent": "empty star", - "type": 3, - }, - ], - "id": 145, - "isSVG": true, - "tagName": "title", - "type": 2, - }, - { - "attributes": { - "d": "M12,15.39L8.24,17.66L9.23,13.38L5.91,10.5L10.29,10.13L12,6.09L13.71,10.13L18.09,10.5L14.77,13.38L15.76,17.66M22,9.24L14.81,8.63L12,2L9.19,8.63L2,9.24L7.45,13.97L5.82,21L12,17.27L18.18,21L16.54,13.97L22,9.24Z", - "data-rrweb-id": 146, - }, - "childNodes": [], - "id": 146, - "isSVG": true, - "tagName": "path", - "type": 2, - }, - ], - "id": 144, - "isSVG": true, - "tagName": "svg", - "type": 2, - }, - ], - "id": 148, - "tagName": "div", - "type": 2, - }, - ], - "id": 12365, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, -} -`; - -exports[`replay/transform transform inputs radio group - $inputType - $value 1`] = ` -{ - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 101, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 100, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, -} -`; - -exports[`replay/transform transform inputs radio_group - $inputType - $value 1`] = ` -{ - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 101, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 100, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 123123, - "style": "width: 100px;height: 30px;position: fixed;left: 0px;top: 0px;", - }, - "childNodes": [], - "id": 123123, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, -} -`; - -exports[`replay/transform transform inputs radio_group 1`] = ` -{ - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 101, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 100, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 54321, - "style": "width: 100px;height: 30px;position: fixed;left: 0px;top: 0px;", - }, - "childNodes": [], - "id": 54321, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, -} -`; - -exports[`replay/transform transform inputs web_view - $inputType - $value 1`] = ` -{ - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 102, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 101, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 12365, - "style": "background-color: #f3f4ef;background-image: url("");background-size: auto;background-repeat: unset;color: #35373e;width: 100px;height: 30px;position: fixed;left: 0px;top: 0px;align-items: center;justify-content: center;display: flex;", - }, - "childNodes": [ - { - "id": 100, - "textContent": "web_view", - "type": 3, - }, - ], - "id": 12365, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, -} -`; - -exports[`replay/transform transform inputs web_view with URL 1`] = ` -{ - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 102, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 101, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 12365, - "style": "background-color: #f3f4ef;background-image: url("");background-size: auto;background-repeat: unset;color: #35373e;width: 100px;height: 30px;position: fixed;left: 0px;top: 0px;align-items: center;justify-content: center;display: flex;", - }, - "childNodes": [ - { - "id": 100, - "textContent": "https://example.com", - "type": 3, - }, - ], - "id": 12365, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, -} -`; - -exports[`replay/transform transform inputs wrapping with labels 1`] = ` -{ - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 103, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 102, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 101, - "style": "width: 100px;height: 30px;position: fixed;left: 0px;top: 0px;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 12359, - "style": null, - "type": "checkbox", - }, - "childNodes": [], - "id": 12359, - "tagName": "input", - "type": 2, - }, - { - "id": 100, - "textContent": "i will wrap the checkbox", - "type": 3, - }, - ], - "id": 101, - "tagName": "label", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, -} -`; - -exports[`replay/transform transform omitting x and y is equivalent to setting them to 0 1`] = ` -[ - { - "data": { - "initialOffset": { - "left": 0, - "top": 0, - }, - "node": { - "childNodes": [ - { - "id": 2, - "name": "html", - "publicId": "", - "systemId": "", - "type": 1, - }, - { - "attributes": { - "data-rrweb-id": 3, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 4, - }, - "childNodes": [ - { - "attributes": { - "type": "text/css", - }, - "childNodes": [ - { - "id": 102, - "textContent": " - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - ", - "type": 3, - }, - ], - "id": 101, - "tagName": "style", - "type": 2, - }, - ], - "id": 4, - "tagName": "head", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 5, - "style": "height: 100vh; width: 100vw;", - }, - "childNodes": [ - { - "attributes": { - "data-rrweb-id": 12345, - "style": "background-color: #f3f4ef;background-image: url("");background-size: auto;background-repeat: unset;color: #35373e;width: 100px;height: 30px;position: fixed;left: 0px;top: 0px;align-items: center;justify-content: center;display: flex;", - }, - "childNodes": [ - { - "id": 100, - "textContent": "image", - "type": 3, - }, - ], - "id": 12345, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", - "data-rrweb-id": 9, - }, - "childNodes": [], - "id": 9, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 7, - }, - "childNodes": [], - "id": 7, - "tagName": "div", - "type": 2, - }, - { - "attributes": { - "data-rrweb-id": 11, - }, - "childNodes": [], - "id": 11, - "tagName": "div", - "type": 2, - }, - ], - "id": 5, - "tagName": "body", - "type": 2, - }, - ], - "id": 3, - "tagName": "html", - "type": 2, - }, - ], - "id": 1, - "type": 0, - }, - }, - "timestamp": 1, - "type": 2, - }, -] -`; - -exports[`replay/transform transform respect incremental ids, replace with body otherwise 1`] = ` -[ - { - "data": { - "id": 5, - "pointerType": 2, - "source": 2, - "type": 7, - "x": 523, - "y": 683, - }, - "delay": 2160, - "timestamp": 1701355473313, - "type": 3, - "windowId": "ddc9c89d-2272-4b07-a280-c00db3a9182f", - }, - { - "data": { - "id": 145, - "pointerType": 2, - "source": 2, - "type": 7, - "x": 523, - "y": 683, - }, - "delay": 2160, - "timestamp": 1701355473313, - "type": 3, - "windowId": "ddc9c89d-2272-4b07-a280-c00db3a9182f", - }, -] -`; diff --git a/ee/frontend/mobile-replay/index.ts b/ee/frontend/mobile-replay/index.ts deleted file mode 100644 index 56a7d2ee45..0000000000 --- a/ee/frontend/mobile-replay/index.ts +++ /dev/null @@ -1,82 +0,0 @@ -import { eventWithTime } from '@rrweb/types' -import { captureException, captureMessage } from '@sentry/react' -import Ajv, { ErrorObject } from 'ajv' - -import { mobileEventWithTime } from './mobile.types' -import mobileSchema from './schema/mobile/rr-mobile-schema.json' -import webSchema from './schema/web/rr-web-schema.json' -import { makeCustomEvent, makeFullEvent, makeIncrementalEvent, makeMetaEvent } from './transformer/transformers' - -const ajv = new Ajv({ - allowUnionTypes: true, -}) // options can be passed, e.g. {allErrors: true} - -const transformers: Record<number, (x: any) => eventWithTime> = { - 2: makeFullEvent, - 3: makeIncrementalEvent, - 4: makeMetaEvent, - 5: makeCustomEvent, -} - -const mobileSchemaValidator = ajv.compile(mobileSchema) - -export function validateFromMobile(data: unknown): { - isValid: boolean - errors: ErrorObject[] | null | undefined -} { - const isValid = mobileSchemaValidator(data) - return { - isValid, - errors: isValid ? null : mobileSchemaValidator.errors, - } -} - -const webSchemaValidator = ajv.compile(webSchema) - -function couldBeEventWithTime(x: unknown): x is eventWithTime | mobileEventWithTime { - return typeof x === 'object' && x !== null && 'type' in x && 'timestamp' in x -} - -export function transformEventToWeb(event: unknown, validateTransformation?: boolean): eventWithTime { - // the transformation needs to never break a recording itself - // so, we default to returning what we received - // replacing it only if there's a valid transformation - let result = event as eventWithTime - try { - if (couldBeEventWithTime(event)) { - const transformer = transformers[event.type] - if (transformer) { - const transformed = transformer(event) - if (validateTransformation) { - validateAgainstWebSchema(transformed) - } - result = transformed - } - } else { - captureMessage(`No type in event`, { extra: { event } }) - } - } catch (e) { - captureException(e, { extra: { event } }) - } - return result -} - -export function transformToWeb(mobileData: (eventWithTime | mobileEventWithTime)[]): eventWithTime[] { - return mobileData.reduce((acc, event) => { - const transformed = transformEventToWeb(event) - acc.push(transformed ? transformed : (event as eventWithTime)) - return acc - }, [] as eventWithTime[]) -} - -export function validateAgainstWebSchema(data: unknown): boolean { - const validationResult = webSchemaValidator(data) - if (!validationResult) { - // we are passing all data through this validation now and don't know how safe the schema is - captureMessage('transformation did not match schema', { - extra: { data, errors: webSchemaValidator.errors }, - }) - } - - return validationResult -} diff --git a/ee/frontend/mobile-replay/mobile.types.ts b/ee/frontend/mobile-replay/mobile.types.ts deleted file mode 100644 index 7e18622fa8..0000000000 --- a/ee/frontend/mobile-replay/mobile.types.ts +++ /dev/null @@ -1,406 +0,0 @@ -// copied from rrweb-snapshot, not included in rrweb types -import { customEvent, EventType, IncrementalSource, removedNodeMutation } from '@rrweb/types' - -export enum NodeType { - Document = 0, - DocumentType = 1, - Element = 2, - Text = 3, - CDATA = 4, - Comment = 5, -} - -export type documentNode = { - type: NodeType.Document - childNodes: serializedNodeWithId[] - compatMode?: string -} - -export type documentTypeNode = { - type: NodeType.DocumentType - name: string - publicId: string - systemId: string -} - -export type attributes = { - [key: string]: string | number | true | null -} - -export type elementNode = { - type: NodeType.Element - tagName: string - attributes: attributes - childNodes: serializedNodeWithId[] - isSVG?: true - needBlock?: boolean - // This is a custom element or not. - isCustom?: true -} - -export type textNode = { - type: NodeType.Text - textContent: string - isStyle?: true -} - -export type cdataNode = { - type: NodeType.CDATA - textContent: '' -} - -export type commentNode = { - type: NodeType.Comment - textContent: string -} - -export type serializedNode = (documentNode | documentTypeNode | elementNode | textNode | cdataNode | commentNode) & { - rootId?: number - isShadowHost?: boolean - isShadow?: boolean -} - -export type serializedNodeWithId = serializedNode & { id: number } - -// end copied section - -export type MobileNodeType = - | 'text' - | 'image' - | 'screenshot' - | 'rectangle' - | 'placeholder' - | 'web_view' - | 'input' - | 'div' - | 'radio_group' - | 'status_bar' - | 'navigation_bar' - -export type MobileStyles = { - /** - * @description maps to CSS color. Accepts any valid CSS color value. Expects a #RGB value e.g. #000 or #000000 - */ - color?: string - /** - * @description maps to CSS background-color. Accepts any valid CSS color value. Expects a #RGB value e.g. #000 or #000000 - */ - backgroundColor?: string - /** - * @description if provided this will be used as a base64 encoded image source for the backgroundImage css property, with no other attributes it is assumed to be a PNG - */ - backgroundImage?: string - /** - * @description can be used alongside the background image property to specify how the image is rendered. Accepts a subset of the valid values for CSS background-size property. If not provided (and backgroundImage is present) defaults to 'auto' - */ - backgroundSize?: 'contain' | 'cover' | 'auto' - /** - * @description if borderWidth is present, then border style is assumed to be solid - */ - borderWidth?: string | number - /** - * @description if borderRadius is present, then border style is assumed to be solid - */ - borderRadius?: string | number - /** - * @description if borderColor is present, then border style is assumed to be solid - */ - borderColor?: string - /** - * @description vertical alignment with respect to its parent - */ - verticalAlign?: 'top' | 'bottom' | 'center' - /** - * @description horizontal alignment with respect to its parent - */ - horizontalAlign?: 'left' | 'right' | 'center' - /** - * @description maps to CSS font-size. Accepts any valid CSS font-size value. Expects a number (treated as pixels) or a string that is a number followed by px e.g. 16px - */ - fontSize?: string | number - /** - * @description maps to CSS font-family. Accepts any valid CSS font-family value. - */ - fontFamily?: string - /** - * @description maps to CSS padding-left. Expects a number (treated as pixels) or a string that is a number followed by px e.g. 16px - */ - paddingLeft?: string | number - /** - * @description maps to CSS padding-right. Expects a number (treated as pixels) or a string that is a number followed by px e.g. 16px - */ - paddingRight?: string | number - /** - * @description maps to CSS padding-top. Expects a number (treated as pixels) or a string that is a number followed by px e.g. 16px - */ - paddingTop?: string | number - /** - * @description maps to CSS padding-bottom. Expects a number (treated as pixels) or a string that is a number followed by px e.g. 16px - */ - paddingBottom?: string | number -} - -type wireframeBase = { - id: number - /** - * @description x and y are the top left corner of the element, if they are present then the element is absolutely positioned, if they are not present this is equivalent to setting them to 0 - */ - x?: number - y?: number - /* - * @description the width dimension of the element, either '100vw' i.e. viewport width. Or a value in pixels. You can omit the unit when specifying pixels. - */ - width: number | '100vw' - /* - * @description the height dimension of the element, the only accepted units is pixels. You can omit the unit. - */ - height: number - childWireframes?: wireframe[] - type: MobileNodeType - style?: MobileStyles -} - -export type wireframeInputBase = wireframeBase & { - type: 'input' - /** - * @description for several attributes we technically only care about true or absent as values. They are represented as bare attributes in HTML <input disabled>. When true that attribute is added to the HTML element, when absent that attribute is not added to the HTML element. When false or absent they are not added to the element. - */ - disabled: boolean -} - -export type wireframeCheckBox = wireframeInputBase & { - inputType: 'checkbox' - /** - * @description for several attributes we technically only care about true or absent as values. They are represented as bare attributes in HTML <input checked>. When true that attribute is added to the HTML element, when absent that attribute is not added to the HTML element. When false or absent they are not added to the element. - */ - checked: boolean - label?: string -} - -export type wireframeToggle = wireframeInputBase & { - inputType: 'toggle' - checked: boolean - label?: string -} - -export type wireframeRadioGroup = wireframeBase & { - type: 'radio_group' -} - -export type wireframeRadio = wireframeInputBase & { - inputType: 'radio' - /** - * @description for several attributes we technically only care about true or absent as values. They are represented as bare attributes in HTML <input checked>. When true that attribute is added to the HTML element, when absent that attribute is not added to the HTML element. When false or absent they are not added to the element. - */ - checked: boolean - label?: string -} - -export type wireframeInput = wireframeInputBase & { - inputType: 'text' | 'password' | 'email' | 'number' | 'search' | 'tel' | 'url' - value?: string -} - -export type wireframeSelect = wireframeInputBase & { - inputType: 'select' - value?: string - options?: string[] -} - -export type wireframeTextArea = wireframeInputBase & { - inputType: 'text_area' - value?: string -} - -export type wireframeButton = wireframeInputBase & { - inputType: 'button' - /** - * @description this is the text that is displayed on the button, if not sent then you must send childNodes with the button content - */ - value?: string -} - -export type wireframeProgress = wireframeInputBase & { - inputType: 'progress' - /** - * @description This attribute specifies how much of the task that has been completed. It must be a valid floating point number between 0 and max, or between 0 and 1 if max is omitted. If there is no value attribute, the progress bar is indeterminate; this indicates that an activity is ongoing with no indication of how long it is expected to take. When bar style is rating this is the number of filled stars. - */ - value?: number - /** - * @description The max attribute, if present, must have a value greater than 0 and be a valid floating point number. The default value is 1. When bar style is rating this is the number of stars. - */ - max?: number - style?: MobileStyles & { - bar: 'horizontal' | 'circular' | 'rating' - } -} - -// these are grouped as a type so that we can easily use them as function parameters -export type wireframeInputComponent = - | wireframeCheckBox - | wireframeRadio - | wireframeInput - | wireframeSelect - | wireframeTextArea - | wireframeButton - | wireframeProgress - | wireframeToggle - -export type wireframeText = wireframeBase & { - type: 'text' - text: string -} - -export type wireframeImage = wireframeBase & { - type: 'image' - /** - * @description this will be used as base64 encoded image source, with no other attributes it is assumed to be a PNG, if omitted a placeholder is rendered - */ - base64?: string -} - -/** - * @description a screenshot behaves exactly like an image, but it is expected to be a screenshot of the screen at the time of the event, when sent as a mutation it must always attached to the root of the playback, when sent as an initial snapshot it must be sent as the first or only snapshot so that it attaches to the body of the playback - */ -export type wireframeScreenshot = wireframeImage & { - type: 'screenshot' -} - -export type wireframeRectangle = wireframeBase & { - type: 'rectangle' -} - -export type wireframeWebView = wireframeBase & { - type: 'web_view' - url?: string -} - -export type wireframePlaceholder = wireframeBase & { - type: 'placeholder' - label?: string -} - -export type wireframeDiv = wireframeBase & { - /* - * @description this is the default type, if no type is specified then it is assumed to be a div - */ - type: 'div' -} - -/** - * @description the status bar respects styling and positioning, but it is expected to be at the top of the screen with limited styling and no child elements - */ -export type wireframeStatusBar = wireframeBase & { - type: 'status_bar' -} - -/** - * @description the navigation bar respects styling and positioning, but it is expected to be at the bottom of the screen with limited styling and no child elements - */ -export type wireframeNavigationBar = wireframeBase & { - type: 'navigation_bar' -} - -export type wireframe = - | wireframeText - | wireframeImage - | wireframeScreenshot - | wireframeRectangle - | wireframeDiv - | wireframeInputComponent - | wireframeRadioGroup - | wireframeWebView - | wireframePlaceholder - | wireframeStatusBar - | wireframeNavigationBar - -// the rrweb full snapshot event type, but it contains wireframes not html -export type fullSnapshotEvent = { - type: EventType.FullSnapshot - data: { - /** - * @description This mimics the RRWeb full snapshot event type, except instead of reporting a serialized DOM it reports a wireframe representation of the screen. - */ - wireframes: wireframe[] - initialOffset: { - top: number - left: number - } - } -} - -export type incrementalSnapshotEvent = - | { - type: EventType.IncrementalSnapshot - data: any // keeps a loose incremental type so that we can accept any rrweb incremental snapshot event type - } - | MobileIncrementalSnapshotEvent - -export type MobileNodeMutation = { - parentId: number - wireframe: wireframe -} - -export type MobileNodeMutationData = { - source: IncrementalSource.Mutation - /** - * @description An update is implemented as a remove and then an add, so the updates array contains the ID of the removed node and the wireframe for the added node - */ - updates?: MobileNodeMutation[] - adds?: MobileNodeMutation[] - /** - * @description A mobile remove is identical to a web remove - */ - removes?: removedNodeMutation[] -} - -export type MobileIncrementalSnapshotEvent = { - type: EventType.IncrementalSnapshot - /** - * @description This sits alongside the RRWeb incremental snapshot event type, mobile replay can send any of the RRWeb incremental snapshot event types, which will be passed unchanged to the player - for example to send touch events. removed node mutations are passed unchanged to the player. - */ - data: MobileNodeMutationData -} - -export type metaEvent = { - type: EventType.Meta - data: { - href?: string - width: number - height: number - } -} - -// this is a custom event _but_ rrweb only types tag as string, and we want to be more specific -export type keyboardEvent = { - type: EventType.Custom - data: { - tag: 'keyboard' - payload: - | { - open: true - styles?: MobileStyles - /** - * @description x and y are the top left corner of the element, if they are present then the element is absolutely positioned, if they are not present then the keyboard is at the bottom of the screen - */ - x?: number - y?: number - /* - * @description the height dimension of the keyboard, the only accepted units is pixels. You can omit the unit. - */ - height: number - /* - * @description the width dimension of the keyboard, the only accepted units is pixels. You can omit the unit. If not present defaults to width of the viewport - */ - width?: number - } - | { open: false } - } -} - -export type mobileEvent = fullSnapshotEvent | metaEvent | customEvent | incrementalSnapshotEvent | keyboardEvent - -export type mobileEventWithTime = mobileEvent & { - timestamp: number - delay?: number -} diff --git a/ee/frontend/mobile-replay/parsing.test.ts b/ee/frontend/mobile-replay/parsing.test.ts deleted file mode 100644 index 5d913b4117..0000000000 --- a/ee/frontend/mobile-replay/parsing.test.ts +++ /dev/null @@ -1,20 +0,0 @@ -import { parseEncodedSnapshots } from 'scenes/session-recordings/player/sessionRecordingDataLogic' - -import { encodedWebSnapshotData } from './__mocks__/encoded-snapshot-data' - -describe('snapshot parsing', () => { - const sessionId = '12345' - const numberOfParsedLinesInData = 3 - - it('handles normal mobile data', async () => { - const parsed = await parseEncodedSnapshots(encodedWebSnapshotData, sessionId, true) - expect(parsed.length).toEqual(numberOfParsedLinesInData) - expect(parsed).toMatchSnapshot() - }) - it('handles mobile data with no meta event', async () => { - const withoutMeta = [encodedWebSnapshotData[0], encodedWebSnapshotData[2]] - const parsed = await parseEncodedSnapshots(withoutMeta, sessionId, true) - expect(parsed.length).toEqual(numberOfParsedLinesInData) - expect(parsed).toMatchSnapshot() - }) -}) diff --git a/ee/frontend/mobile-replay/schema/mobile/rr-mobile-schema.json b/ee/frontend/mobile-replay/schema/mobile/rr-mobile-schema.json deleted file mode 100644 index 55ce111b3a..0000000000 --- a/ee/frontend/mobile-replay/schema/mobile/rr-mobile-schema.json +++ /dev/null @@ -1,1349 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "anyOf": [ - { - "additionalProperties": false, - "properties": { - "data": { - "additionalProperties": false, - "properties": { - "initialOffset": { - "additionalProperties": false, - "properties": { - "left": { - "type": "number" - }, - "top": { - "type": "number" - } - }, - "required": ["top", "left"], - "type": "object" - }, - "wireframes": { - "description": "This mimics the RRWeb full snapshot event type, except instead of reporting a serialized DOM it reports a wireframe representation of the screen.", - "items": { - "$ref": "#/definitions/wireframe" - }, - "type": "array" - } - }, - "required": ["wireframes", "initialOffset"], - "type": "object" - }, - "delay": { - "type": "number" - }, - "timestamp": { - "type": "number" - }, - "type": { - "$ref": "#/definitions/EventType.FullSnapshot" - } - }, - "required": ["data", "timestamp", "type"], - "type": "object" - }, - { - "additionalProperties": false, - "properties": { - "data": { - "additionalProperties": false, - "properties": { - "height": { - "type": "number" - }, - "href": { - "type": "string" - }, - "width": { - "type": "number" - } - }, - "required": ["width", "height"], - "type": "object" - }, - "delay": { - "type": "number" - }, - "timestamp": { - "type": "number" - }, - "type": { - "$ref": "#/definitions/EventType.Meta" - } - }, - "required": ["data", "timestamp", "type"], - "type": "object" - }, - { - "additionalProperties": false, - "properties": { - "data": { - "additionalProperties": false, - "properties": { - "payload": {}, - "tag": { - "type": "string" - } - }, - "required": ["tag", "payload"], - "type": "object" - }, - "delay": { - "type": "number" - }, - "timestamp": { - "type": "number" - }, - "type": { - "$ref": "#/definitions/EventType.Custom" - } - }, - "required": ["data", "timestamp", "type"], - "type": "object" - }, - { - "additionalProperties": false, - "properties": { - "data": {}, - "delay": { - "type": "number" - }, - "timestamp": { - "type": "number" - }, - "type": { - "$ref": "#/definitions/EventType.IncrementalSnapshot" - } - }, - "required": ["data", "timestamp", "type"], - "type": "object" - }, - { - "additionalProperties": false, - "properties": { - "data": { - "$ref": "#/definitions/MobileNodeMutationData", - "description": "This sits alongside the RRWeb incremental snapshot event type, mobile replay can send any of the RRWeb incremental snapshot event types, which will be passed unchanged to the player - for example to send touch events. removed node mutations are passed unchanged to the player." - }, - "delay": { - "type": "number" - }, - "timestamp": { - "type": "number" - }, - "type": { - "$ref": "#/definitions/EventType.IncrementalSnapshot" - } - }, - "required": ["data", "timestamp", "type"], - "type": "object" - }, - { - "additionalProperties": false, - "properties": { - "data": { - "additionalProperties": false, - "properties": { - "payload": { - "anyOf": [ - { - "additionalProperties": false, - "properties": { - "height": { - "type": "number" - }, - "open": { - "const": true, - "type": "boolean" - }, - "styles": { - "$ref": "#/definitions/MobileStyles" - }, - "width": { - "type": "number" - }, - "x": { - "description": "x and y are the top left corner of the element, if they are present then the element is absolutely positioned, if they are not present then the keyboard is at the bottom of the screen", - "type": "number" - }, - "y": { - "type": "number" - } - }, - "required": ["open", "height"], - "type": "object" - }, - { - "additionalProperties": false, - "properties": { - "open": { - "const": false, - "type": "boolean" - } - }, - "required": ["open"], - "type": "object" - } - ] - }, - "tag": { - "const": "keyboard", - "type": "string" - } - }, - "required": ["tag", "payload"], - "type": "object" - }, - "delay": { - "type": "number" - }, - "timestamp": { - "type": "number" - }, - "type": { - "$ref": "#/definitions/EventType.Custom" - } - }, - "required": ["data", "timestamp", "type"], - "type": "object" - } - ], - "definitions": { - "EventType.Custom": { - "const": 5, - "type": "number" - }, - "EventType.FullSnapshot": { - "const": 2, - "type": "number" - }, - "EventType.IncrementalSnapshot": { - "const": 3, - "type": "number" - }, - "EventType.Meta": { - "const": 4, - "type": "number" - }, - "IncrementalSource.Mutation": { - "const": 0, - "type": "number" - }, - "MobileNodeMutation": { - "additionalProperties": false, - "properties": { - "parentId": { - "type": "number" - }, - "wireframe": { - "$ref": "#/definitions/wireframe" - } - }, - "required": ["parentId", "wireframe"], - "type": "object" - }, - "MobileNodeMutationData": { - "additionalProperties": false, - "properties": { - "adds": { - "items": { - "$ref": "#/definitions/MobileNodeMutation" - }, - "type": "array" - }, - "removes": { - "description": "A mobile remove is identical to a web remove", - "items": { - "$ref": "#/definitions/removedNodeMutation" - }, - "type": "array" - }, - "source": { - "$ref": "#/definitions/IncrementalSource.Mutation" - }, - "updates": { - "description": "An update is implemented as a remove and then an add, so the updates array contains the ID of the removed node and the wireframe for the added node", - "items": { - "$ref": "#/definitions/MobileNodeMutation" - }, - "type": "array" - } - }, - "required": ["source"], - "type": "object" - }, - "MobileNodeType": { - "enum": [ - "text", - "image", - "screenshot", - "rectangle", - "placeholder", - "web_view", - "input", - "div", - "radio_group", - "status_bar", - "navigation_bar" - ], - "type": "string" - }, - "MobileStyles": { - "additionalProperties": false, - "properties": { - "backgroundColor": { - "description": "maps to CSS background-color. Accepts any valid CSS color value. Expects a #RGB value e.g. #000 or #000000", - "type": "string" - }, - "backgroundImage": { - "description": "if provided this will be used as a base64 encoded image source for the backgroundImage css property, with no other attributes it is assumed to be a PNG", - "type": "string" - }, - "backgroundSize": { - "description": "can be used alongside the background image property to specify how the image is rendered. Accepts a subset of the valid values for CSS background-size property. If not provided (and backgroundImage is present) defaults to 'auto'", - "enum": ["contain", "cover", "auto"], - "type": "string" - }, - "borderColor": { - "description": "if borderColor is present, then border style is assumed to be solid", - "type": "string" - }, - "borderRadius": { - "description": "if borderRadius is present, then border style is assumed to be solid", - "type": ["string", "number"] - }, - "borderWidth": { - "description": "if borderWidth is present, then border style is assumed to be solid", - "type": ["string", "number"] - }, - "color": { - "description": "maps to CSS color. Accepts any valid CSS color value. Expects a #RGB value e.g. #000 or #000000", - "type": "string" - }, - "fontFamily": { - "description": "maps to CSS font-family. Accepts any valid CSS font-family value.", - "type": "string" - }, - "fontSize": { - "description": "maps to CSS font-size. Accepts any valid CSS font-size value. Expects a number (treated as pixels) or a string that is a number followed by px e.g. 16px", - "type": ["string", "number"] - }, - "horizontalAlign": { - "description": "horizontal alignment with respect to its parent", - "enum": ["left", "right", "center"], - "type": "string" - }, - "paddingBottom": { - "description": "maps to CSS padding-bottom. Expects a number (treated as pixels) or a string that is a number followed by px e.g. 16px", - "type": ["string", "number"] - }, - "paddingLeft": { - "description": "maps to CSS padding-left. Expects a number (treated as pixels) or a string that is a number followed by px e.g. 16px", - "type": ["string", "number"] - }, - "paddingRight": { - "description": "maps to CSS padding-right. Expects a number (treated as pixels) or a string that is a number followed by px e.g. 16px", - "type": ["string", "number"] - }, - "paddingTop": { - "description": "maps to CSS padding-top. Expects a number (treated as pixels) or a string that is a number followed by px e.g. 16px", - "type": ["string", "number"] - }, - "verticalAlign": { - "description": "vertical alignment with respect to its parent", - "enum": ["top", "bottom", "center"], - "type": "string" - } - }, - "type": "object" - }, - "removedNodeMutation": { - "additionalProperties": false, - "properties": { - "id": { - "type": "number" - }, - "isShadow": { - "type": "boolean" - }, - "parentId": { - "type": "number" - } - }, - "required": ["parentId", "id"], - "type": "object" - }, - "wireframe": { - "anyOf": [ - { - "$ref": "#/definitions/wireframeText" - }, - { - "$ref": "#/definitions/wireframeImage" - }, - { - "$ref": "#/definitions/wireframeScreenshot" - }, - { - "$ref": "#/definitions/wireframeRectangle" - }, - { - "$ref": "#/definitions/wireframeDiv" - }, - { - "$ref": "#/definitions/wireframeInputComponent" - }, - { - "$ref": "#/definitions/wireframeRadioGroup" - }, - { - "$ref": "#/definitions/wireframeWebView" - }, - { - "$ref": "#/definitions/wireframePlaceholder" - }, - { - "$ref": "#/definitions/wireframeStatusBar" - }, - { - "$ref": "#/definitions/wireframeNavigationBar" - } - ] - }, - "wireframeButton": { - "additionalProperties": false, - "properties": { - "childWireframes": { - "items": { - "$ref": "#/definitions/wireframe" - }, - "type": "array" - }, - "disabled": { - "description": "for several attributes we technically only care about true or absent as values. They are represented as bare attributes in HTML <input disabled>. When true that attribute is added to the HTML element, when absent that attribute is not added to the HTML element. When false or absent they are not added to the element.", - "type": "boolean" - }, - "height": { - "type": "number" - }, - "id": { - "type": "number" - }, - "inputType": { - "const": "button", - "type": "string" - }, - "style": { - "$ref": "#/definitions/MobileStyles" - }, - "type": { - "$ref": "#/definitions/MobileNodeType" - }, - "value": { - "description": "this is the text that is displayed on the button, if not sent then you must send childNodes with the button content", - "type": "string" - }, - "width": { - "anyOf": [ - { - "type": "number" - }, - { - "const": "100vw", - "type": "string" - } - ] - }, - "x": { - "description": "x and y are the top left corner of the element, if they are present then the element is absolutely positioned, if they are not present this is equivalent to setting them to 0", - "type": "number" - }, - "y": { - "type": "number" - } - }, - "required": ["disabled", "height", "id", "inputType", "type", "width"], - "type": "object" - }, - "wireframeCheckBox": { - "additionalProperties": false, - "properties": { - "checked": { - "description": "for several attributes we technically only care about true or absent as values. They are represented as bare attributes in HTML <input checked>. When true that attribute is added to the HTML element, when absent that attribute is not added to the HTML element. When false or absent they are not added to the element.", - "type": "boolean" - }, - "childWireframes": { - "items": { - "$ref": "#/definitions/wireframe" - }, - "type": "array" - }, - "disabled": { - "description": "for several attributes we technically only care about true or absent as values. They are represented as bare attributes in HTML <input disabled>. When true that attribute is added to the HTML element, when absent that attribute is not added to the HTML element. When false or absent they are not added to the element.", - "type": "boolean" - }, - "height": { - "type": "number" - }, - "id": { - "type": "number" - }, - "inputType": { - "const": "checkbox", - "type": "string" - }, - "label": { - "type": "string" - }, - "style": { - "$ref": "#/definitions/MobileStyles" - }, - "type": { - "$ref": "#/definitions/MobileNodeType" - }, - "width": { - "anyOf": [ - { - "type": "number" - }, - { - "const": "100vw", - "type": "string" - } - ] - }, - "x": { - "description": "x and y are the top left corner of the element, if they are present then the element is absolutely positioned, if they are not present this is equivalent to setting them to 0", - "type": "number" - }, - "y": { - "type": "number" - } - }, - "required": ["checked", "disabled", "height", "id", "inputType", "type", "width"], - "type": "object" - }, - "wireframeDiv": { - "additionalProperties": false, - "properties": { - "childWireframes": { - "items": { - "$ref": "#/definitions/wireframe" - }, - "type": "array" - }, - "height": { - "type": "number" - }, - "id": { - "type": "number" - }, - "style": { - "$ref": "#/definitions/MobileStyles" - }, - "type": { - "$ref": "#/definitions/MobileNodeType" - }, - "width": { - "anyOf": [ - { - "type": "number" - }, - { - "const": "100vw", - "type": "string" - } - ] - }, - "x": { - "description": "x and y are the top left corner of the element, if they are present then the element is absolutely positioned, if they are not present this is equivalent to setting them to 0", - "type": "number" - }, - "y": { - "type": "number" - } - }, - "required": ["height", "id", "type", "width"], - "type": "object" - }, - "wireframeImage": { - "additionalProperties": false, - "properties": { - "base64": { - "description": "this will be used as base64 encoded image source, with no other attributes it is assumed to be a PNG, if omitted a placeholder is rendered", - "type": "string" - }, - "childWireframes": { - "items": { - "$ref": "#/definitions/wireframe" - }, - "type": "array" - }, - "height": { - "type": "number" - }, - "id": { - "type": "number" - }, - "style": { - "$ref": "#/definitions/MobileStyles" - }, - "type": { - "$ref": "#/definitions/MobileNodeType" - }, - "width": { - "anyOf": [ - { - "type": "number" - }, - { - "const": "100vw", - "type": "string" - } - ] - }, - "x": { - "description": "x and y are the top left corner of the element, if they are present then the element is absolutely positioned, if they are not present this is equivalent to setting them to 0", - "type": "number" - }, - "y": { - "type": "number" - } - }, - "required": ["height", "id", "type", "width"], - "type": "object" - }, - "wireframeInput": { - "additionalProperties": false, - "properties": { - "childWireframes": { - "items": { - "$ref": "#/definitions/wireframe" - }, - "type": "array" - }, - "disabled": { - "description": "for several attributes we technically only care about true or absent as values. They are represented as bare attributes in HTML <input disabled>. When true that attribute is added to the HTML element, when absent that attribute is not added to the HTML element. When false or absent they are not added to the element.", - "type": "boolean" - }, - "height": { - "type": "number" - }, - "id": { - "type": "number" - }, - "inputType": { - "enum": ["text", "password", "email", "number", "search", "tel", "url"], - "type": "string" - }, - "style": { - "$ref": "#/definitions/MobileStyles" - }, - "type": { - "$ref": "#/definitions/MobileNodeType" - }, - "value": { - "type": "string" - }, - "width": { - "anyOf": [ - { - "type": "number" - }, - { - "const": "100vw", - "type": "string" - } - ] - }, - "x": { - "description": "x and y are the top left corner of the element, if they are present then the element is absolutely positioned, if they are not present this is equivalent to setting them to 0", - "type": "number" - }, - "y": { - "type": "number" - } - }, - "required": ["disabled", "height", "id", "inputType", "type", "width"], - "type": "object" - }, - "wireframeInputComponent": { - "anyOf": [ - { - "$ref": "#/definitions/wireframeCheckBox" - }, - { - "$ref": "#/definitions/wireframeRadio" - }, - { - "$ref": "#/definitions/wireframeInput" - }, - { - "$ref": "#/definitions/wireframeSelect" - }, - { - "$ref": "#/definitions/wireframeTextArea" - }, - { - "$ref": "#/definitions/wireframeButton" - }, - { - "$ref": "#/definitions/wireframeProgress" - }, - { - "$ref": "#/definitions/wireframeToggle" - } - ] - }, - "wireframeNavigationBar": { - "additionalProperties": false, - "description": "the navigation bar respects styling and positioning, but it is expected to be at the bottom of the screen with limited styling and no child elements", - "properties": { - "childWireframes": { - "items": { - "$ref": "#/definitions/wireframe" - }, - "type": "array" - }, - "height": { - "type": "number" - }, - "id": { - "type": "number" - }, - "style": { - "$ref": "#/definitions/MobileStyles" - }, - "type": { - "$ref": "#/definitions/MobileNodeType" - }, - "width": { - "anyOf": [ - { - "type": "number" - }, - { - "const": "100vw", - "type": "string" - } - ] - }, - "x": { - "description": "x and y are the top left corner of the element, if they are present then the element is absolutely positioned, if they are not present this is equivalent to setting them to 0", - "type": "number" - }, - "y": { - "type": "number" - } - }, - "required": ["height", "id", "type", "width"], - "type": "object" - }, - "wireframePlaceholder": { - "additionalProperties": false, - "properties": { - "childWireframes": { - "items": { - "$ref": "#/definitions/wireframe" - }, - "type": "array" - }, - "height": { - "type": "number" - }, - "id": { - "type": "number" - }, - "label": { - "type": "string" - }, - "style": { - "$ref": "#/definitions/MobileStyles" - }, - "type": { - "$ref": "#/definitions/MobileNodeType" - }, - "width": { - "anyOf": [ - { - "type": "number" - }, - { - "const": "100vw", - "type": "string" - } - ] - }, - "x": { - "description": "x and y are the top left corner of the element, if they are present then the element is absolutely positioned, if they are not present this is equivalent to setting them to 0", - "type": "number" - }, - "y": { - "type": "number" - } - }, - "required": ["height", "id", "type", "width"], - "type": "object" - }, - "wireframeProgress": { - "additionalProperties": false, - "properties": { - "childWireframes": { - "items": { - "$ref": "#/definitions/wireframe" - }, - "type": "array" - }, - "disabled": { - "description": "for several attributes we technically only care about true or absent as values. They are represented as bare attributes in HTML <input disabled>. When true that attribute is added to the HTML element, when absent that attribute is not added to the HTML element. When false or absent they are not added to the element.", - "type": "boolean" - }, - "height": { - "type": "number" - }, - "id": { - "type": "number" - }, - "inputType": { - "const": "progress", - "type": "string" - }, - "max": { - "description": "The max attribute, if present, must have a value greater than 0 and be a valid floating point number. The default value is 1. When bar style is rating this is the number of stars.", - "type": "number" - }, - "style": { - "$ref": "#/definitions/MobileStyles" - }, - "type": { - "$ref": "#/definitions/MobileNodeType" - }, - "value": { - "description": "This attribute specifies how much of the task that has been completed. It must be a valid floating point number between 0 and max, or between 0 and 1 if max is omitted. If there is no value attribute, the progress bar is indeterminate; this indicates that an activity is ongoing with no indication of how long it is expected to take. When bar style is rating this is the number of filled stars.", - "type": "number" - }, - "width": { - "anyOf": [ - { - "type": "number" - }, - { - "const": "100vw", - "type": "string" - } - ] - }, - "x": { - "description": "x and y are the top left corner of the element, if they are present then the element is absolutely positioned, if they are not present this is equivalent to setting them to 0", - "type": "number" - }, - "y": { - "type": "number" - } - }, - "required": ["disabled", "height", "id", "inputType", "type", "width"], - "type": "object" - }, - "wireframeRadio": { - "additionalProperties": false, - "properties": { - "checked": { - "description": "for several attributes we technically only care about true or absent as values. They are represented as bare attributes in HTML <input checked>. When true that attribute is added to the HTML element, when absent that attribute is not added to the HTML element. When false or absent they are not added to the element.", - "type": "boolean" - }, - "childWireframes": { - "items": { - "$ref": "#/definitions/wireframe" - }, - "type": "array" - }, - "disabled": { - "description": "for several attributes we technically only care about true or absent as values. They are represented as bare attributes in HTML <input disabled>. When true that attribute is added to the HTML element, when absent that attribute is not added to the HTML element. When false or absent they are not added to the element.", - "type": "boolean" - }, - "height": { - "type": "number" - }, - "id": { - "type": "number" - }, - "inputType": { - "const": "radio", - "type": "string" - }, - "label": { - "type": "string" - }, - "style": { - "$ref": "#/definitions/MobileStyles" - }, - "type": { - "$ref": "#/definitions/MobileNodeType" - }, - "width": { - "anyOf": [ - { - "type": "number" - }, - { - "const": "100vw", - "type": "string" - } - ] - }, - "x": { - "description": "x and y are the top left corner of the element, if they are present then the element is absolutely positioned, if they are not present this is equivalent to setting them to 0", - "type": "number" - }, - "y": { - "type": "number" - } - }, - "required": ["checked", "disabled", "height", "id", "inputType", "type", "width"], - "type": "object" - }, - "wireframeRadioGroup": { - "additionalProperties": false, - "properties": { - "childWireframes": { - "items": { - "$ref": "#/definitions/wireframe" - }, - "type": "array" - }, - "height": { - "type": "number" - }, - "id": { - "type": "number" - }, - "style": { - "$ref": "#/definitions/MobileStyles" - }, - "type": { - "$ref": "#/definitions/MobileNodeType" - }, - "width": { - "anyOf": [ - { - "type": "number" - }, - { - "const": "100vw", - "type": "string" - } - ] - }, - "x": { - "description": "x and y are the top left corner of the element, if they are present then the element is absolutely positioned, if they are not present this is equivalent to setting them to 0", - "type": "number" - }, - "y": { - "type": "number" - } - }, - "required": ["height", "id", "type", "width"], - "type": "object" - }, - "wireframeRectangle": { - "additionalProperties": false, - "properties": { - "childWireframes": { - "items": { - "$ref": "#/definitions/wireframe" - }, - "type": "array" - }, - "height": { - "type": "number" - }, - "id": { - "type": "number" - }, - "style": { - "$ref": "#/definitions/MobileStyles" - }, - "type": { - "$ref": "#/definitions/MobileNodeType" - }, - "width": { - "anyOf": [ - { - "type": "number" - }, - { - "const": "100vw", - "type": "string" - } - ] - }, - "x": { - "description": "x and y are the top left corner of the element, if they are present then the element is absolutely positioned, if they are not present this is equivalent to setting them to 0", - "type": "number" - }, - "y": { - "type": "number" - } - }, - "required": ["height", "id", "type", "width"], - "type": "object" - }, - "wireframeScreenshot": { - "additionalProperties": false, - "description": "a screenshot behaves exactly like an image, but it is expected to be a screenshot of the screen at the time of the event, when sent as a mutation it must always attached to the root of the playback, when sent as an initial snapshot it must be sent as the first or only snapshot so that it attaches to the body of the playback", - "properties": { - "base64": { - "description": "this will be used as base64 encoded image source, with no other attributes it is assumed to be a PNG, if omitted a placeholder is rendered", - "type": "string" - }, - "childWireframes": { - "items": { - "$ref": "#/definitions/wireframe" - }, - "type": "array" - }, - "height": { - "type": "number" - }, - "id": { - "type": "number" - }, - "style": { - "$ref": "#/definitions/MobileStyles" - }, - "type": { - "$ref": "#/definitions/MobileNodeType" - }, - "width": { - "anyOf": [ - { - "type": "number" - }, - { - "const": "100vw", - "type": "string" - } - ] - }, - "x": { - "description": "x and y are the top left corner of the element, if they are present then the element is absolutely positioned, if they are not present this is equivalent to setting them to 0", - "type": "number" - }, - "y": { - "type": "number" - } - }, - "required": ["height", "id", "type", "width"], - "type": "object" - }, - "wireframeSelect": { - "additionalProperties": false, - "properties": { - "childWireframes": { - "items": { - "$ref": "#/definitions/wireframe" - }, - "type": "array" - }, - "disabled": { - "description": "for several attributes we technically only care about true or absent as values. They are represented as bare attributes in HTML <input disabled>. When true that attribute is added to the HTML element, when absent that attribute is not added to the HTML element. When false or absent they are not added to the element.", - "type": "boolean" - }, - "height": { - "type": "number" - }, - "id": { - "type": "number" - }, - "inputType": { - "const": "select", - "type": "string" - }, - "options": { - "items": { - "type": "string" - }, - "type": "array" - }, - "style": { - "$ref": "#/definitions/MobileStyles" - }, - "type": { - "$ref": "#/definitions/MobileNodeType" - }, - "value": { - "type": "string" - }, - "width": { - "anyOf": [ - { - "type": "number" - }, - { - "const": "100vw", - "type": "string" - } - ] - }, - "x": { - "description": "x and y are the top left corner of the element, if they are present then the element is absolutely positioned, if they are not present this is equivalent to setting them to 0", - "type": "number" - }, - "y": { - "type": "number" - } - }, - "required": ["disabled", "height", "id", "inputType", "type", "width"], - "type": "object" - }, - "wireframeStatusBar": { - "additionalProperties": false, - "description": "the status bar respects styling and positioning, but it is expected to be at the top of the screen with limited styling and no child elements", - "properties": { - "childWireframes": { - "items": { - "$ref": "#/definitions/wireframe" - }, - "type": "array" - }, - "height": { - "type": "number" - }, - "id": { - "type": "number" - }, - "style": { - "$ref": "#/definitions/MobileStyles" - }, - "type": { - "$ref": "#/definitions/MobileNodeType" - }, - "width": { - "anyOf": [ - { - "type": "number" - }, - { - "const": "100vw", - "type": "string" - } - ] - }, - "x": { - "description": "x and y are the top left corner of the element, if they are present then the element is absolutely positioned, if they are not present this is equivalent to setting them to 0", - "type": "number" - }, - "y": { - "type": "number" - } - }, - "required": ["height", "id", "type", "width"], - "type": "object" - }, - "wireframeText": { - "additionalProperties": false, - "properties": { - "childWireframes": { - "items": { - "$ref": "#/definitions/wireframe" - }, - "type": "array" - }, - "height": { - "type": "number" - }, - "id": { - "type": "number" - }, - "style": { - "$ref": "#/definitions/MobileStyles" - }, - "text": { - "type": "string" - }, - "type": { - "$ref": "#/definitions/MobileNodeType" - }, - "width": { - "anyOf": [ - { - "type": "number" - }, - { - "const": "100vw", - "type": "string" - } - ] - }, - "x": { - "description": "x and y are the top left corner of the element, if they are present then the element is absolutely positioned, if they are not present this is equivalent to setting them to 0", - "type": "number" - }, - "y": { - "type": "number" - } - }, - "required": ["height", "id", "text", "type", "width"], - "type": "object" - }, - "wireframeTextArea": { - "additionalProperties": false, - "properties": { - "childWireframes": { - "items": { - "$ref": "#/definitions/wireframe" - }, - "type": "array" - }, - "disabled": { - "description": "for several attributes we technically only care about true or absent as values. They are represented as bare attributes in HTML <input disabled>. When true that attribute is added to the HTML element, when absent that attribute is not added to the HTML element. When false or absent they are not added to the element.", - "type": "boolean" - }, - "height": { - "type": "number" - }, - "id": { - "type": "number" - }, - "inputType": { - "const": "text_area", - "type": "string" - }, - "style": { - "$ref": "#/definitions/MobileStyles" - }, - "type": { - "$ref": "#/definitions/MobileNodeType" - }, - "value": { - "type": "string" - }, - "width": { - "anyOf": [ - { - "type": "number" - }, - { - "const": "100vw", - "type": "string" - } - ] - }, - "x": { - "description": "x and y are the top left corner of the element, if they are present then the element is absolutely positioned, if they are not present this is equivalent to setting them to 0", - "type": "number" - }, - "y": { - "type": "number" - } - }, - "required": ["disabled", "height", "id", "inputType", "type", "width"], - "type": "object" - }, - "wireframeToggle": { - "additionalProperties": false, - "properties": { - "checked": { - "type": "boolean" - }, - "childWireframes": { - "items": { - "$ref": "#/definitions/wireframe" - }, - "type": "array" - }, - "disabled": { - "description": "for several attributes we technically only care about true or absent as values. They are represented as bare attributes in HTML <input disabled>. When true that attribute is added to the HTML element, when absent that attribute is not added to the HTML element. When false or absent they are not added to the element.", - "type": "boolean" - }, - "height": { - "type": "number" - }, - "id": { - "type": "number" - }, - "inputType": { - "const": "toggle", - "type": "string" - }, - "label": { - "type": "string" - }, - "style": { - "$ref": "#/definitions/MobileStyles" - }, - "type": { - "$ref": "#/definitions/MobileNodeType" - }, - "width": { - "anyOf": [ - { - "type": "number" - }, - { - "const": "100vw", - "type": "string" - } - ] - }, - "x": { - "description": "x and y are the top left corner of the element, if they are present then the element is absolutely positioned, if they are not present this is equivalent to setting them to 0", - "type": "number" - }, - "y": { - "type": "number" - } - }, - "required": ["checked", "disabled", "height", "id", "inputType", "type", "width"], - "type": "object" - }, - "wireframeWebView": { - "additionalProperties": false, - "properties": { - "childWireframes": { - "items": { - "$ref": "#/definitions/wireframe" - }, - "type": "array" - }, - "height": { - "type": "number" - }, - "id": { - "type": "number" - }, - "style": { - "$ref": "#/definitions/MobileStyles" - }, - "type": { - "$ref": "#/definitions/MobileNodeType" - }, - "url": { - "type": "string" - }, - "width": { - "anyOf": [ - { - "type": "number" - }, - { - "const": "100vw", - "type": "string" - } - ] - }, - "x": { - "description": "x and y are the top left corner of the element, if they are present then the element is absolutely positioned, if they are not present this is equivalent to setting them to 0", - "type": "number" - }, - "y": { - "type": "number" - } - }, - "required": ["height", "id", "type", "width"], - "type": "object" - } - } -} diff --git a/ee/frontend/mobile-replay/schema/web/rr-web-schema.json b/ee/frontend/mobile-replay/schema/web/rr-web-schema.json deleted file mode 100644 index 79102a23ef..0000000000 --- a/ee/frontend/mobile-replay/schema/web/rr-web-schema.json +++ /dev/null @@ -1,968 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "anyOf": [ - { - "additionalProperties": false, - "properties": { - "data": {}, - "delay": { - "type": "number" - }, - "timestamp": { - "type": "number" - }, - "type": { - "$ref": "#/definitions/EventType.DomContentLoaded" - } - }, - "required": ["data", "timestamp", "type"], - "type": "object" - }, - { - "additionalProperties": false, - "properties": { - "data": {}, - "delay": { - "type": "number" - }, - "timestamp": { - "type": "number" - }, - "type": { - "$ref": "#/definitions/EventType.Load" - } - }, - "required": ["data", "timestamp", "type"], - "type": "object" - }, - { - "additionalProperties": false, - "properties": { - "data": { - "additionalProperties": false, - "properties": { - "initialOffset": { - "additionalProperties": false, - "properties": { - "left": { - "type": "number" - }, - "top": { - "type": "number" - } - }, - "required": ["top", "left"], - "type": "object" - }, - "node": {} - }, - "required": ["node", "initialOffset"], - "type": "object" - }, - "delay": { - "type": "number" - }, - "timestamp": { - "type": "number" - }, - "type": { - "$ref": "#/definitions/EventType.FullSnapshot" - } - }, - "required": ["data", "timestamp", "type"], - "type": "object" - }, - { - "additionalProperties": false, - "properties": { - "data": { - "$ref": "#/definitions/incrementalData" - }, - "delay": { - "type": "number" - }, - "timestamp": { - "type": "number" - }, - "type": { - "$ref": "#/definitions/EventType.IncrementalSnapshot" - } - }, - "required": ["data", "timestamp", "type"], - "type": "object" - }, - { - "additionalProperties": false, - "properties": { - "data": { - "additionalProperties": false, - "properties": { - "height": { - "type": "number" - }, - "href": { - "type": "string" - }, - "width": { - "type": "number" - } - }, - "required": ["href", "width", "height"], - "type": "object" - }, - "delay": { - "type": "number" - }, - "timestamp": { - "type": "number" - }, - "type": { - "$ref": "#/definitions/EventType.Meta" - } - }, - "required": ["data", "timestamp", "type"], - "type": "object" - }, - { - "additionalProperties": false, - "properties": { - "data": { - "additionalProperties": false, - "properties": { - "payload": {}, - "tag": { - "type": "string" - } - }, - "required": ["tag", "payload"], - "type": "object" - }, - "delay": { - "type": "number" - }, - "timestamp": { - "type": "number" - }, - "type": { - "$ref": "#/definitions/EventType.Custom" - } - }, - "required": ["data", "timestamp", "type"], - "type": "object" - }, - { - "additionalProperties": false, - "properties": { - "data": { - "additionalProperties": false, - "properties": { - "payload": {}, - "plugin": { - "type": "string" - } - }, - "required": ["plugin", "payload"], - "type": "object" - }, - "delay": { - "type": "number" - }, - "timestamp": { - "type": "number" - }, - "type": { - "$ref": "#/definitions/EventType.Plugin" - } - }, - "required": ["data", "timestamp", "type"], - "type": "object" - } - ], - "definitions": { - "CanvasContext": { - "enum": [0, 1, 2], - "type": "number" - }, - "EventType.Custom": { - "const": 5, - "type": "number" - }, - "EventType.DomContentLoaded": { - "const": 0, - "type": "number" - }, - "EventType.FullSnapshot": { - "const": 2, - "type": "number" - }, - "EventType.IncrementalSnapshot": { - "const": 3, - "type": "number" - }, - "EventType.Load": { - "const": 1, - "type": "number" - }, - "EventType.Meta": { - "const": 4, - "type": "number" - }, - "EventType.Plugin": { - "const": 6, - "type": "number" - }, - "FontFaceDescriptors": { - "additionalProperties": false, - "properties": { - "display": { - "type": "string" - }, - "featureSettings": { - "type": "string" - }, - "stretch": { - "type": "string" - }, - "style": { - "type": "string" - }, - "unicodeRange": { - "type": "string" - }, - "variant": { - "type": "string" - }, - "weight": { - "type": "string" - } - }, - "type": "object" - }, - "IncrementalSource.AdoptedStyleSheet": { - "const": 15, - "type": "number" - }, - "IncrementalSource.CanvasMutation": { - "const": 9, - "type": "number" - }, - "IncrementalSource.CustomElement": { - "const": 16, - "type": "number" - }, - "IncrementalSource.Drag": { - "const": 12, - "type": "number" - }, - "IncrementalSource.Font": { - "const": 10, - "type": "number" - }, - "IncrementalSource.Input": { - "const": 5, - "type": "number" - }, - "IncrementalSource.MediaInteraction": { - "const": 7, - "type": "number" - }, - "IncrementalSource.MouseInteraction": { - "const": 2, - "type": "number" - }, - "IncrementalSource.MouseMove": { - "const": 1, - "type": "number" - }, - "IncrementalSource.Mutation": { - "const": 0, - "type": "number" - }, - "IncrementalSource.Scroll": { - "const": 3, - "type": "number" - }, - "IncrementalSource.Selection": { - "const": 14, - "type": "number" - }, - "IncrementalSource.StyleDeclaration": { - "const": 13, - "type": "number" - }, - "IncrementalSource.StyleSheetRule": { - "const": 8, - "type": "number" - }, - "IncrementalSource.TouchMove": { - "const": 6, - "type": "number" - }, - "IncrementalSource.ViewportResize": { - "const": 4, - "type": "number" - }, - "MediaInteractions": { - "enum": [0, 1, 2, 3, 4], - "type": "number" - }, - "MouseInteractions": { - "enum": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], - "type": "number" - }, - "PointerTypes": { - "enum": [0, 1, 2], - "type": "number" - }, - "SelectionRange": { - "additionalProperties": false, - "properties": { - "end": { - "type": "number" - }, - "endOffset": { - "type": "number" - }, - "start": { - "type": "number" - }, - "startOffset": { - "type": "number" - } - }, - "required": ["start", "startOffset", "end", "endOffset"], - "type": "object" - }, - "addedNodeMutation": { - "additionalProperties": false, - "properties": { - "nextId": { - "type": ["number", "null"] - }, - "node": {}, - "parentId": { - "type": "number" - }, - "previousId": { - "type": ["number", "null"] - } - }, - "required": ["parentId", "nextId", "node"], - "type": "object" - }, - "adoptedStyleSheetData": { - "additionalProperties": false, - "properties": { - "id": { - "type": "number" - }, - "source": { - "$ref": "#/definitions/IncrementalSource.AdoptedStyleSheet" - }, - "styleIds": { - "items": { - "type": "number" - }, - "type": "array" - }, - "styles": { - "items": { - "additionalProperties": false, - "properties": { - "rules": { - "items": { - "$ref": "#/definitions/styleSheetAddRule" - }, - "type": "array" - }, - "styleId": { - "type": "number" - } - }, - "required": ["styleId", "rules"], - "type": "object" - }, - "type": "array" - } - }, - "required": ["id", "source", "styleIds"], - "type": "object" - }, - "attributeMutation": { - "additionalProperties": false, - "properties": { - "attributes": { - "additionalProperties": { - "anyOf": [ - { - "type": "string" - }, - { - "$ref": "#/definitions/styleOMValue" - }, - { - "type": "null" - } - ] - }, - "type": "object" - }, - "id": { - "type": "number" - } - }, - "required": ["id", "attributes"], - "type": "object" - }, - "canvasMutationCommand": { - "additionalProperties": false, - "properties": { - "args": { - "items": {}, - "type": "array" - }, - "property": { - "type": "string" - }, - "setter": { - "const": true, - "type": "boolean" - } - }, - "required": ["property", "args"], - "type": "object" - }, - "canvasMutationData": { - "anyOf": [ - { - "additionalProperties": false, - "properties": { - "commands": { - "items": { - "$ref": "#/definitions/canvasMutationCommand" - }, - "type": "array" - }, - "id": { - "type": "number" - }, - "source": { - "$ref": "#/definitions/IncrementalSource.CanvasMutation" - }, - "type": { - "$ref": "#/definitions/CanvasContext" - } - }, - "required": ["commands", "id", "source", "type"], - "type": "object" - }, - { - "additionalProperties": false, - "properties": { - "args": { - "items": {}, - "type": "array" - }, - "id": { - "type": "number" - }, - "property": { - "type": "string" - }, - "setter": { - "const": true, - "type": "boolean" - }, - "source": { - "$ref": "#/definitions/IncrementalSource.CanvasMutation" - }, - "type": { - "$ref": "#/definitions/CanvasContext" - } - }, - "required": ["args", "id", "property", "source", "type"], - "type": "object" - } - ] - }, - "customElementData": { - "additionalProperties": false, - "properties": { - "define": { - "additionalProperties": false, - "properties": { - "name": { - "type": "string" - } - }, - "required": ["name"], - "type": "object" - }, - "source": { - "$ref": "#/definitions/IncrementalSource.CustomElement" - } - }, - "required": ["source"], - "type": "object" - }, - "fontData": { - "additionalProperties": false, - "properties": { - "buffer": { - "type": "boolean" - }, - "descriptors": { - "$ref": "#/definitions/FontFaceDescriptors" - }, - "family": { - "type": "string" - }, - "fontSource": { - "type": "string" - }, - "source": { - "$ref": "#/definitions/IncrementalSource.Font" - } - }, - "required": ["buffer", "family", "fontSource", "source"], - "type": "object" - }, - "incrementalData": { - "anyOf": [ - { - "$ref": "#/definitions/mutationData" - }, - { - "$ref": "#/definitions/mousemoveData" - }, - { - "$ref": "#/definitions/mouseInteractionData" - }, - { - "$ref": "#/definitions/scrollData" - }, - { - "$ref": "#/definitions/viewportResizeData" - }, - { - "$ref": "#/definitions/inputData" - }, - { - "$ref": "#/definitions/mediaInteractionData" - }, - { - "$ref": "#/definitions/styleSheetRuleData" - }, - { - "$ref": "#/definitions/canvasMutationData" - }, - { - "$ref": "#/definitions/fontData" - }, - { - "$ref": "#/definitions/selectionData" - }, - { - "$ref": "#/definitions/styleDeclarationData" - }, - { - "$ref": "#/definitions/adoptedStyleSheetData" - }, - { - "$ref": "#/definitions/customElementData" - } - ] - }, - "inputData": { - "additionalProperties": false, - "properties": { - "id": { - "type": "number" - }, - "isChecked": { - "type": "boolean" - }, - "source": { - "$ref": "#/definitions/IncrementalSource.Input" - }, - "text": { - "type": "string" - }, - "userTriggered": { - "type": "boolean" - } - }, - "required": ["id", "isChecked", "source", "text"], - "type": "object" - }, - "mediaInteractionData": { - "additionalProperties": false, - "properties": { - "currentTime": { - "type": "number" - }, - "id": { - "type": "number" - }, - "loop": { - "type": "boolean" - }, - "muted": { - "type": "boolean" - }, - "playbackRate": { - "type": "number" - }, - "source": { - "$ref": "#/definitions/IncrementalSource.MediaInteraction" - }, - "type": { - "$ref": "#/definitions/MediaInteractions" - }, - "volume": { - "type": "number" - } - }, - "required": ["id", "source", "type"], - "type": "object" - }, - "mouseInteractionData": { - "additionalProperties": false, - "properties": { - "id": { - "type": "number" - }, - "pointerType": { - "$ref": "#/definitions/PointerTypes" - }, - "source": { - "$ref": "#/definitions/IncrementalSource.MouseInteraction" - }, - "type": { - "$ref": "#/definitions/MouseInteractions" - }, - "x": { - "type": "number" - }, - "y": { - "type": "number" - } - }, - "required": ["id", "source", "type"], - "type": "object" - }, - "mousePosition": { - "additionalProperties": false, - "properties": { - "id": { - "type": "number" - }, - "timeOffset": { - "type": "number" - }, - "x": { - "type": "number" - }, - "y": { - "type": "number" - } - }, - "required": ["x", "y", "id", "timeOffset"], - "type": "object" - }, - "mousemoveData": { - "additionalProperties": false, - "properties": { - "positions": { - "items": { - "$ref": "#/definitions/mousePosition" - }, - "type": "array" - }, - "source": { - "anyOf": [ - { - "$ref": "#/definitions/IncrementalSource.MouseMove" - }, - { - "$ref": "#/definitions/IncrementalSource.TouchMove" - }, - { - "$ref": "#/definitions/IncrementalSource.Drag" - } - ] - } - }, - "required": ["source", "positions"], - "type": "object" - }, - "mutationData": { - "additionalProperties": false, - "properties": { - "adds": { - "items": { - "$ref": "#/definitions/addedNodeMutation" - }, - "type": "array" - }, - "attributes": { - "items": { - "$ref": "#/definitions/attributeMutation" - }, - "type": "array" - }, - "isAttachIframe": { - "const": true, - "type": "boolean" - }, - "removes": { - "items": { - "$ref": "#/definitions/removedNodeMutation" - }, - "type": "array" - }, - "source": { - "$ref": "#/definitions/IncrementalSource.Mutation" - }, - "texts": { - "items": { - "$ref": "#/definitions/textMutation" - }, - "type": "array" - } - }, - "required": ["adds", "attributes", "removes", "source", "texts"], - "type": "object" - }, - "removedNodeMutation": { - "additionalProperties": false, - "properties": { - "id": { - "type": "number" - }, - "isShadow": { - "type": "boolean" - }, - "parentId": { - "type": "number" - } - }, - "required": ["parentId", "id"], - "type": "object" - }, - "scrollData": { - "additionalProperties": false, - "properties": { - "id": { - "type": "number" - }, - "source": { - "$ref": "#/definitions/IncrementalSource.Scroll" - }, - "x": { - "type": "number" - }, - "y": { - "type": "number" - } - }, - "required": ["id", "source", "x", "y"], - "type": "object" - }, - "selectionData": { - "additionalProperties": false, - "properties": { - "ranges": { - "items": { - "$ref": "#/definitions/SelectionRange" - }, - "type": "array" - }, - "source": { - "$ref": "#/definitions/IncrementalSource.Selection" - } - }, - "required": ["ranges", "source"], - "type": "object" - }, - "styleDeclarationData": { - "additionalProperties": false, - "properties": { - "id": { - "type": "number" - }, - "index": { - "items": { - "type": "number" - }, - "type": "array" - }, - "remove": { - "additionalProperties": false, - "properties": { - "property": { - "type": "string" - } - }, - "required": ["property"], - "type": "object" - }, - "set": { - "additionalProperties": false, - "properties": { - "priority": { - "type": "string" - }, - "property": { - "type": "string" - }, - "value": { - "type": ["string", "null"] - } - }, - "required": ["property", "value"], - "type": "object" - }, - "source": { - "$ref": "#/definitions/IncrementalSource.StyleDeclaration" - }, - "styleId": { - "type": "number" - } - }, - "required": ["index", "source"], - "type": "object" - }, - "styleOMValue": { - "additionalProperties": { - "anyOf": [ - { - "$ref": "#/definitions/styleValueWithPriority" - }, - { - "type": "string" - }, - { - "const": false, - "type": "boolean" - } - ] - }, - "type": "object" - }, - "styleSheetAddRule": { - "additionalProperties": false, - "properties": { - "index": { - "anyOf": [ - { - "type": "number" - }, - { - "items": { - "type": "number" - }, - "type": "array" - } - ] - }, - "rule": { - "type": "string" - } - }, - "required": ["rule"], - "type": "object" - }, - "styleSheetDeleteRule": { - "additionalProperties": false, - "properties": { - "index": { - "anyOf": [ - { - "type": "number" - }, - { - "items": { - "type": "number" - }, - "type": "array" - } - ] - } - }, - "required": ["index"], - "type": "object" - }, - "styleSheetRuleData": { - "additionalProperties": false, - "properties": { - "adds": { - "items": { - "$ref": "#/definitions/styleSheetAddRule" - }, - "type": "array" - }, - "id": { - "type": "number" - }, - "removes": { - "items": { - "$ref": "#/definitions/styleSheetDeleteRule" - }, - "type": "array" - }, - "replace": { - "type": "string" - }, - "replaceSync": { - "type": "string" - }, - "source": { - "$ref": "#/definitions/IncrementalSource.StyleSheetRule" - }, - "styleId": { - "type": "number" - } - }, - "required": ["source"], - "type": "object" - }, - "styleValueWithPriority": { - "items": { - "type": "string" - }, - "maxItems": 2, - "minItems": 2, - "type": "array" - }, - "textMutation": { - "additionalProperties": false, - "properties": { - "id": { - "type": "number" - }, - "value": { - "type": ["string", "null"] - } - }, - "required": ["id", "value"], - "type": "object" - }, - "viewportResizeData": { - "additionalProperties": false, - "properties": { - "height": { - "type": "number" - }, - "source": { - "$ref": "#/definitions/IncrementalSource.ViewportResize" - }, - "width": { - "type": "number" - } - }, - "required": ["height", "source", "width"], - "type": "object" - } - } -} diff --git a/ee/frontend/mobile-replay/transform.test.ts b/ee/frontend/mobile-replay/transform.test.ts deleted file mode 100644 index 77c5316d5a..0000000000 --- a/ee/frontend/mobile-replay/transform.test.ts +++ /dev/null @@ -1,1235 +0,0 @@ -import posthogEE from '@posthog/ee/exports' -import { EventType } from '@rrweb/types' -import { ifEeDescribe } from 'lib/ee.test' - -import { PostHogEE } from '../../../frontend/@posthog/ee/types' -import * as incrementalSnapshotJson from './__mocks__/increment-with-child-duplication.json' -import { validateAgainstWebSchema, validateFromMobile } from './index' -import { wireframe } from './mobile.types' -import { stripBarsFromWireframes } from './transformer/transformers' - -const unspecifiedBase64ImageURL = - 'iVBORw0KGgoAAAANSUhEUgAAABgAAAAYCAYAAADgdz34AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAApgAAAKYB3X3/OAAAABl0RVh0U29mdHdhcmUAd3d3Lmlua3NjYXBlLm9yZ5vuPBoAAANCSURBVEiJtZZPbBtFFMZ/M7ubXdtdb1xSFyeilBapySVU8h8OoFaooFSqiihIVIpQBKci6KEg9Q6H9kovIHoCIVQJJCKE1ENFjnAgcaSGC6rEnxBwA04Tx43t2FnvDAfjkNibxgHxnWb2e/u992bee7tCa00YFsffekFY+nUzFtjW0LrvjRXrCDIAaPLlW0nHL0SsZtVoaF98mLrx3pdhOqLtYPHChahZcYYO7KvPFxvRl5XPp1sN3adWiD1ZAqD6XYK1b/dvE5IWryTt2udLFedwc1+9kLp+vbbpoDh+6TklxBeAi9TL0taeWpdmZzQDry0AcO+jQ12RyohqqoYoo8RDwJrU+qXkjWtfi8Xxt58BdQuwQs9qC/afLwCw8tnQbqYAPsgxE1S6F3EAIXux2oQFKm0ihMsOF71dHYx+f3NND68ghCu1YIoePPQN1pGRABkJ6Bus96CutRZMydTl+TvuiRW1m3n0eDl0vRPcEysqdXn+jsQPsrHMquGeXEaY4Yk4wxWcY5V/9scqOMOVUFthatyTy8QyqwZ+kDURKoMWxNKr2EeqVKcTNOajqKoBgOE28U4tdQl5p5bwCw7BWquaZSzAPlwjlithJtp3pTImSqQRrb2Z8PHGigD4RZuNX6JYj6wj7O4TFLbCO/Mn/m8R+h6rYSUb3ekokRY6f/YukArN979jcW+V/S8g0eT/N3VN3kTqWbQ428m9/8k0P/1aIhF36PccEl6EhOcAUCrXKZXXWS3XKd2vc/TRBG9O5ELC17MmWubD2nKhUKZa26Ba2+D3P+4/MNCFwg59oWVeYhkzgN/JDR8deKBoD7Y+ljEjGZ0sosXVTvbc6RHirr2reNy1OXd6pJsQ+gqjk8VWFYmHrwBzW/n+uMPFiRwHB2I7ih8ciHFxIkd/3Omk5tCDV1t+2nNu5sxxpDFNx+huNhVT3/zMDz8usXC3ddaHBj1GHj/As08fwTS7Kt1HBTmyN29vdwAw+/wbwLVOJ3uAD1wi/dUH7Qei66PfyuRj4Ik9is+hglfbkbfR3cnZm7chlUWLdwmprtCohX4HUtlOcQjLYCu+fzGJH2QRKvP3UNz8bWk1qMxjGTOMThZ3kvgLI5AzFfo379UAAAAASUVORK5CYII=' - -const heartEyesEmojiURL = 'data:image/png;base64,' + unspecifiedBase64ImageURL - -function fakeWireframe(type: string, children?: wireframe[]): wireframe { - // this is a fake so we can force the type - return { type, childWireframes: children || [] } as Partial<wireframe> as wireframe -} - -describe('replay/transform', () => { - describe('validation', () => { - test('example of validating incoming _invalid_ data', () => { - const invalidData = { - foo: 'abc', - bar: 'abc', - } - - expect(validateFromMobile(invalidData).isValid).toBe(false) - }) - - test('example of validating mobile meta event', () => { - const validData = { - data: { width: 1, height: 1 }, - timestamp: 1, - type: EventType.Meta, - } - - expect(validateFromMobile(validData)).toStrictEqual({ - isValid: true, - errors: null, - }) - }) - - describe('validate web schema', () => { - test('does not block when invalid', () => { - expect(validateAgainstWebSchema({})).toBeFalsy() - }) - - test('should be valid when...', () => { - expect(validateAgainstWebSchema({ data: {}, timestamp: 12345, type: 0 })).toBeTruthy() - }) - }) - }) - - ifEeDescribe('transform', () => { - let posthogEEModule: PostHogEE - beforeEach(async () => { - posthogEEModule = await posthogEE() - }) - - test('can process top level screenshot', () => { - expect( - posthogEEModule.mobileReplay?.transformToWeb([ - { - data: { width: 300, height: 600 }, - timestamp: 1, - type: 4, - }, - { - windowId: '5173a13e-abac-4def-b227-2f81dc2808b6', - data: { - wireframes: [ - { - base64: 'image-content', - height: 914, - id: 151700670, - style: { - backgroundColor: '#F3EFF7', - }, - type: 'screenshot', - width: 411, - x: 0, - y: 0, - }, - ], - }, - timestamp: 1714397321578, - type: 2, - }, - ]) - ).toMatchSnapshot() - }) - - test('can process screenshot mutation', () => { - expect( - posthogEEModule.mobileReplay?.transformToWeb([ - { - data: { width: 300, height: 600 }, - timestamp: 1, - type: 4, - }, - { - windowId: '5173a13e-abac-4def-b227-2f81dc2808b6', - data: { - source: 0, - updates: [ - { - wireframe: { - base64: 'mutated-image-content', - height: 914, - id: 151700670, - style: { - backgroundColor: '#F3EFF7', - }, - type: 'screenshot', - width: 411, - x: 0, - y: 0, - }, - }, - ], - }, - timestamp: 1714397336836, - type: 3, - seen: 3551987272322930, - }, - ]) - ).toMatchSnapshot() - }) - - test('can process unknown types without error', () => { - expect( - posthogEEModule.mobileReplay?.transformToWeb([ - { - data: { width: 300, height: 600 }, - timestamp: 1, - type: 4, - }, - { - data: { href: 'included when present', width: 300, height: 600 }, - timestamp: 1, - type: 4, - }, - { type: 9999 }, - { - type: 2, - data: { - wireframes: [ - { - id: 12345, - x: 25, - y: 42, - width: 100, - height: 30, - type: 'image', - }, - ], - }, - timestamp: 1, - }, - ]) - ).toMatchSnapshot() - }) - - test('can ignore unknown wireframe types', () => { - const unexpectedWireframeType = posthogEEModule.mobileReplay?.transformToWeb([ - { - data: { screen: 'App Home Page', width: 300, height: 600 }, - timestamp: 1, - type: 4, - }, - { - type: 2, - data: { - wireframes: [ - { - id: 12345, - x: 11, - y: 12, - width: 100, - height: 30, - type: 'something in the SDK but not yet the transformer', - }, - ], - }, - timestamp: 1, - }, - ]) - expect(unexpectedWireframeType).toMatchSnapshot() - }) - - test('can short-circuit non-mobile full snapshot', () => { - const allWeb = posthogEEModule.mobileReplay?.transformToWeb([ - { - data: { href: 'https://my-awesome.site', width: 300, height: 600 }, - timestamp: 1, - type: 4, - }, - { - type: 2, - data: { - node: { the: 'payload' }, - }, - timestamp: 1, - }, - ]) - expect(allWeb).toMatchSnapshot() - }) - - test('can convert images', () => { - const exampleWithImage = posthogEEModule.mobileReplay?.transformToWeb([ - { - data: { - screen: 'App Home Page', - width: 300, - height: 600, - }, - timestamp: 1, - type: 4, - }, - { - type: 2, - data: { - wireframes: [ - { - id: 12345, - x: 11, - y: 12, - width: 100, - height: 30, - // clip: { - // bottom: 83, - // right: 44, - // }, - type: 'text', - text: 'β°Ÿι²δ©žγ‘›μ“―μž˜αŒ«δ΅€γ₯¦ι·λžιˆ…ζ―…β”ŒλΉ―ζΉŒα²—', - style: { - // family: '疴ꖻ䖭㋑⁃⻋ꑧٹ㧕Ⓖ', - // size: 4220431756569966319, - color: '#ffffff', - }, - }, - { - id: 12345, - x: 25, - y: 42, - width: 100, - height: 30, - // clip: { - // bottom: 83, - // right: 44, - // }, - type: 'image', - base64: heartEyesEmojiURL, - }, - ], - }, - timestamp: 1, - }, - ]) - expect(exampleWithImage).toMatchSnapshot() - }) - - test('can convert rect with text', () => { - const exampleWithRectAndText = posthogEEModule.mobileReplay?.transformToWeb([ - { - data: { - width: 300, - height: 600, - }, - timestamp: 1, - type: 4, - }, - { - type: 2, - data: { - wireframes: [ - { - id: 12345, - x: 11, - y: 12, - width: 100, - height: 30, - type: 'rectangle', - style: { - color: '#ee3ee4', - borderColor: '#ee3ee4', - borderWidth: '4', - borderRadius: '10px', - }, - }, - { - id: 12345, - x: 13, - y: 17, - width: 100, - height: 30, - verticalAlign: 'top', - horizontalAlign: 'right', - type: 'text', - text: 'i am in the box', - fontSize: '12px', - fontFamily: 'sans-serif', - }, - ], - }, - timestamp: 1, - }, - ]) - expect(exampleWithRectAndText).toMatchSnapshot() - }) - - test('child wireframes are processed', () => { - const textEvent = posthogEEModule.mobileReplay?.transformToWeb([ - { - data: { screen: 'App Home Page', width: 300, height: 600 }, - timestamp: 1, - type: 4, - }, - { - type: 2, - data: { - wireframes: [ - { - id: 123456789, - childWireframes: [ - { - id: 98765, - childWireframes: [ - { - id: 12345, - x: 11, - y: 12, - width: 100, - height: 30, - type: 'text', - text: 'first nested', - style: { - color: '#ffffff', - backgroundColor: '#000000', - borderWidth: '4px', - borderColor: '#000ddd', - borderRadius: '10px', - }, - }, - { - id: 12345, - x: 11, - y: 12, - width: 100, - height: 30, - type: 'text', - text: 'second nested', - style: { - color: '#ffffff', - backgroundColor: '#000000', - borderWidth: '4px', - borderColor: '#000ddd', - borderRadius: '10px', - }, - }, - ], - }, - { - id: 12345, - x: 11, - y: 12, - width: 100, - height: 30, - // clip: { - // bottom: 83, - // right: 44, - // }, - type: 'text', - text: 'third (different level) nested', - style: { - // family: '疴ꖻ䖭㋑⁃⻋ꑧٹ㧕Ⓖ', - // size: 4220431756569966319, - color: '#ffffff', - backgroundColor: '#000000', - borderWidth: '4px', - borderColor: '#000ddd', - borderRadius: '10', // you can omit the pixels - }, - }, - ], - }, - ], - }, - timestamp: 1, - }, - ]) - expect(textEvent).toMatchSnapshot() - }) - - test('respect incremental ids, replace with body otherwise', () => { - const textEvent = posthogEEModule.mobileReplay?.transformToWeb([ - { - windowId: 'ddc9c89d-2272-4b07-a280-c00db3a9182f', - data: { - id: 0, // must be an element id - replace with body - pointerType: 2, - source: 2, - type: 7, - x: 523, - y: 683, - }, - timestamp: 1701355473313, - type: 3, - delay: 2160, - }, - { - windowId: 'ddc9c89d-2272-4b07-a280-c00db3a9182f', - data: { - id: 145, // element provided - respected without validation - pointerType: 2, - source: 2, - type: 7, - x: 523, - y: 683, - }, - timestamp: 1701355473313, - type: 3, - delay: 2160, - }, - ]) - expect(textEvent).toMatchSnapshot() - }) - - test('incremental mutations de-duplicate the tree', () => { - const conversion = posthogEEModule.mobileReplay?.transformEventToWeb(incrementalSnapshotJson) - expect(conversion).toMatchSnapshot() - }) - - test('omitting x and y is equivalent to setting them to 0', () => { - expect( - posthogEEModule.mobileReplay?.transformToWeb([ - { - type: 2, - data: { - wireframes: [ - { - id: 12345, - width: 100, - height: 30, - type: 'image', - }, - ], - }, - timestamp: 1, - }, - ]) - ).toMatchSnapshot() - }) - - test('can convert status bar', () => { - const converted = posthogEEModule.mobileReplay?.transformToWeb([ - { - data: { - width: 300, - height: 600, - }, - timestamp: 1, - type: 4, - }, - { - type: 2, - data: { - wireframes: [ - { - id: 12345, - type: 'status_bar', - // _we'll process the x and y, but they should always be 0 - x: 0, - y: 0, - // we'll process the width - // width: 100, - height: 30, - style: { - // we can't expect to receive all of these values, - // but we'll handle them, because that's easier than not doing - color: '#ee3ee4', - borderColor: '#ee3ee4', - borderWidth: '4', - borderRadius: '10px', - backgroundColor: '#000000', - }, - }, - { - id: 12345, - type: 'status_bar', - x: 13, - y: 17, - width: 100, - // zero height is respected - height: 0, - // as with styling we don't expect to receive these values, - // but we'll respect them if they are present - horizontalAlign: 'right', - verticalAlign: 'top', - fontSize: '12px', - fontFamily: 'sans-serif', - }, - ], - }, - timestamp: 1, - }, - ]) - expect(converted).toMatchSnapshot() - }) - - test('can convert navigation bar', () => { - const converted = posthogEEModule.mobileReplay?.transformToWeb([ - { - data: { - width: 300, - height: 600, - }, - timestamp: 1, - type: 4, - }, - { - type: 2, - data: { - wireframes: [ - { - id: 12345, - type: 'navigation_bar', - // we respect x and y but expect this to be at the bottom of the screen - x: 11, - y: 12, - // we respect width but expect it to be fullscreen - width: 100, - height: 30, - // as with status bar, we don't expect to receive all of these values, - // but we'll respect them if they are present - style: { - color: '#ee3ee4', - borderColor: '#ee3ee4', - borderWidth: '4', - borderRadius: '10px', - }, - }, - ], - }, - timestamp: 1, - }, - ]) - expect(converted).toMatchSnapshot() - }) - - test('can convert invalid text wireframe', () => { - const converted = posthogEEModule.mobileReplay?.transformToWeb([ - { - data: { - width: 300, - height: 600, - }, - timestamp: 1, - type: 4, - }, - { - type: 2, - data: { - wireframes: [ - { - id: 12345, - type: 'text', - x: 11, - y: 12, - width: 100, - height: 30, - style: { - color: '#ee3ee4', - borderColor: '#ee3ee4', - borderWidth: '4', - borderRadius: '10px', - }, - // text property is missing - }, - ], - }, - timestamp: 1, - }, - ]) - expect(converted).toMatchSnapshot() - }) - - test('can set background image to base64 png', () => { - const converted = posthogEEModule.mobileReplay?.transformToWeb([ - { - data: { - width: 300, - height: 600, - }, - timestamp: 1, - type: 4, - }, - { - type: 2, - data: { - wireframes: [ - { - id: 12345, - type: 'div', - x: 0, - y: 0, - height: 30, - style: { backgroundImage: heartEyesEmojiURL }, - }, - { - id: 12346, - type: 'div', - x: 0, - y: 0, - height: 30, - style: { backgroundImage: unspecifiedBase64ImageURL }, - }, - { - id: 12346, - type: 'div', - x: 0, - y: 0, - height: 30, - style: { backgroundImage: unspecifiedBase64ImageURL, backgroundSize: 'cover' }, - }, - { - id: 12346, - type: 'div', - x: 0, - y: 0, - height: 30, - // should be ignored - style: { backgroundImage: null }, - }, - ], - }, - timestamp: 1, - }, - ]) - expect(converted).toMatchSnapshot() - }) - - describe('inputs', () => { - test('input gets 0 padding by default but can be overridden', () => { - expect( - posthogEEModule.mobileReplay?.transformEventToWeb({ - type: 2, - data: { - wireframes: [ - { - id: 12359, - width: 100, - height: 30, - type: 'input', - inputType: 'text', - }, - { - id: 12361, - width: 100, - height: 30, - type: 'input', - inputType: 'text', - style: { - paddingLeft: '16px', - paddingRight: 16, - }, - }, - ], - }, - timestamp: 1, - }) - ).toMatchSnapshot() - }) - - test('buttons with nested elements', () => { - expect( - posthogEEModule.mobileReplay?.transformEventToWeb({ - type: 2, - data: { - wireframes: [ - { - id: 12359, - width: 100, - height: 30, - type: 'input', - inputType: 'button', - childNodes: [ - { - id: 12360, - width: 100, - height: 30, - type: 'text', - text: 'click me', - }, - ], - }, - { - id: 12361, - width: 100, - height: 30, - type: 'input', - inputType: 'button', - value: 'click me', - childNodes: [ - { - id: 12362, - width: 100, - height: 30, - type: 'text', - text: 'and have more text', - }, - ], - }, - ], - }, - timestamp: 1, - }) - ).toMatchSnapshot() - }) - test('wrapping with labels', () => { - expect( - posthogEEModule.mobileReplay?.transformEventToWeb({ - type: 2, - data: { - wireframes: [ - { - id: 12359, - width: 100, - height: 30, - type: 'input', - inputType: 'checkbox', - label: 'i will wrap the checkbox', - }, - ], - }, - timestamp: 1, - }) - ).toMatchSnapshot() - }) - - test('web_view with URL', () => { - expect( - posthogEEModule.mobileReplay?.transformEventToWeb({ - type: 2, - data: { - wireframes: [ - { - id: 12365, - width: 100, - height: 30, - type: 'web_view', - url: 'https://example.com', - }, - ], - }, - timestamp: 1, - }) - ).toMatchSnapshot() - }) - - test('progress rating', () => { - expect( - posthogEEModule.mobileReplay?.transformEventToWeb({ - type: 2, - data: { - wireframes: [ - { - id: 12365, - width: 100, - height: 30, - type: 'input', - inputType: 'progress', - style: { bar: 'rating' }, - max: '12', - value: '6.5', - }, - ], - }, - timestamp: 1, - }) - ).toMatchSnapshot() - }) - - test('open keyboard custom event', () => { - expect( - posthogEEModule.mobileReplay?.transformEventToWeb({ - timestamp: 1, - type: EventType.Custom, - data: { tag: 'keyboard', payload: { open: true, height: 150 } }, - }) - ).toMatchSnapshot() - }) - - test('isolated add mutation', () => { - expect( - posthogEEModule.mobileReplay?.transformEventToWeb({ - timestamp: 1, - type: EventType.IncrementalSnapshot, - data: { - source: 0, - adds: [ - { - parentId: 54321, - wireframe: { - id: 12365, - width: 100, - height: 30, - type: 'input', - inputType: 'progress', - style: { bar: 'rating' }, - max: '12', - value: '6.5', - }, - }, - ], - }, - }) - ).toMatchSnapshot() - }) - - test('isolated remove mutation', () => { - expect( - posthogEEModule.mobileReplay?.transformEventToWeb({ - timestamp: 1, - type: EventType.IncrementalSnapshot, - data: { - source: 0, - removes: [{ parentId: 54321, id: 12345 }], - }, - }) - ).toMatchSnapshot() - }) - - test('isolated update mutation', () => { - expect( - posthogEEModule.mobileReplay?.transformEventToWeb({ - timestamp: 1, - type: EventType.IncrementalSnapshot, - data: { - source: 0, - texts: [], - attributes: [], - updates: [ - { - parentId: 54321, - wireframe: { - id: 12365, - width: 100, - height: 30, - type: 'input', - inputType: 'progress', - style: { bar: 'rating' }, - max: '12', - value: '6.5', - }, - }, - ], - }, - }) - ).toMatchSnapshot() - }) - - test('closed keyboard custom event', () => { - expect( - posthogEEModule.mobileReplay?.transformEventToWeb({ - timestamp: 1, - type: EventType.Custom, - data: { tag: 'keyboard', payload: { open: false } }, - }) - ).toMatchSnapshot() - }) - - test('radio_group', () => { - expect( - posthogEEModule.mobileReplay?.transformEventToWeb({ - type: 2, - data: { - wireframes: [ - { - id: 54321, - width: 100, - height: 30, - type: 'radio_group', - timestamp: 12345, - childNodes: [ - { - id: 12345, - width: 100, - height: 30, - type: 'input', - inputType: 'radio', - checked: true, - label: 'first', - }, - { - id: 12346, - width: 100, - height: 30, - type: 'input', - inputType: 'radio', - checked: false, - label: 'second', - }, - { - id: 12347, - width: 100, - height: 30, - type: 'text', - text: 'to check that only radios are given a name', - }, - ], - }, - ], - }, - timestamp: 1, - }) - ).toMatchSnapshot() - }) - test.each([ - { - id: 12346, - width: 100, - height: 30, - type: 'input', - inputType: 'text', - value: 'hello', - }, - { - id: 12347, - width: 100, - height: 30, - type: 'input', - inputType: 'text', - // without value - }, - { - id: 12348, - width: 100, - height: 30, - type: 'input', - inputType: 'password', - // without value - }, - { - id: 12349, - width: 100, - height: 30, - type: 'input', - inputType: 'email', - // without value - }, - { - id: 12350, - width: 100, - height: 30, - type: 'input', - inputType: 'number', - // without value - }, - { - id: 12351, - width: 100, - height: 30, - type: 'input', - inputType: 'search', - // without value - }, - { - id: 12352, - width: 100, - height: 30, - type: 'input', - inputType: 'tel', - disabled: true, - }, - { - id: 12352, - width: 100, - height: 30, - type: 'input', - inputType: 'url', - value: 'https://example.io', - disabled: false, - }, - { - id: 123123, - width: 100, - height: 30, - type: 'radio_group', - // oh, oh, no child nodes - }, - { - id: 12354, - width: 100, - height: 30, - type: 'radio group', - childNodes: [ - { - id: 12355, - width: 100, - height: 30, - type: 'input', - inputType: 'radio', - checked: true, - label: 'first', - }, - { - id: 12356, - width: 100, - height: 30, - type: 'input', - inputType: 'radio', - checked: false, - label: 'second', - }, - ], - }, - { - id: 12357, - width: 100, - height: 30, - type: 'input', - inputType: 'checkbox', - checked: true, - label: 'first', - }, - { - id: 12357, - width: 100, - height: 30, - type: 'input', - inputType: 'checkbox', - checked: false, - label: 'second', - }, - { - id: 12357, - width: 100, - height: 30, - type: 'input', - inputType: 'checkbox', - checked: true, - disabled: true, - label: 'third', - }, - { - id: 12357, - width: 100, - height: 30, - type: 'input', - inputType: 'checkbox', - checked: true, - disabled: false, - // no label - }, - { - id: 12357, - width: 100, - height: 30, - type: 'input', - inputType: 'toggle', - checked: true, - label: 'first', - }, - { - id: 12357, - width: 100, - height: 30, - type: 'input', - inputType: 'toggle', - checked: false, - label: 'second', - }, - { - id: 12357, - width: 100, - height: 30, - type: 'input', - inputType: 'toggle', - checked: true, - disabled: true, - label: 'third', - }, - { - id: 12357, - width: 100, - height: 30, - type: 'input', - inputType: 'toggle', - checked: true, - disabled: false, - // no label - }, - { - id: 12358, - width: 100, - height: 30, - type: 'input', - inputType: 'button', - value: 'click me', - }, - { - id: 12363, - width: 100, - height: 30, - type: 'input', - inputType: 'textArea', - value: 'hello', - }, - { - id: 12364, - width: 100, - height: 30, - type: 'input', - inputType: 'textArea', - }, - { - id: 12365, - width: 100, - height: 30, - type: 'input', - inputType: 'select', - value: 'hello', - options: ['hello', 'world'], - }, - { - id: 12365, - width: 100, - height: 30, - type: 'input', - // missing input type - should be ignored - // inputType: 'select', - value: 'hello', - options: ['hello', 'world'], - }, - { - id: 12365, - width: 100, - height: 30, - type: 'input', - inputType: 'progress', - style: { bar: 'circular' }, - }, - { - id: 12365, - width: 100, - height: 30, - type: 'input', - inputType: 'progress', - style: { bar: 'horizontal' }, - }, - { - id: 12365, - width: 100, - height: 30, - type: 'input', - inputType: 'progress', - style: { bar: 'horizontal' }, - value: 0.75, - }, - { - id: 12365, - width: 100, - height: 30, - type: 'input', - inputType: 'progress', - style: { bar: 'horizontal' }, - value: 0.75, - max: 2.5, - }, - { - id: 12365, - width: 100, - height: 30, - type: 'placeholder', - label: 'hello', - }, - { - id: 12365, - width: 100, - height: 30, - type: 'web_view', - }, - ])('$type - $inputType - $value', (testCase) => { - expect( - posthogEEModule.mobileReplay?.transformEventToWeb({ - type: 2, - data: { - wireframes: [testCase], - }, - timestamp: 1, - }) - ).toMatchSnapshot() - }) - }) - }) - - describe('separate status and navbar from other wireframes', () => { - it('no-op', () => { - expect(stripBarsFromWireframes([])).toEqual({ - appNodes: [], - statusBar: undefined, - navigationBar: undefined, - }) - }) - - it('top-level status-bar', () => { - const statusBar = fakeWireframe('status_bar') - expect(stripBarsFromWireframes([statusBar])).toEqual({ appNodes: [], statusBar, navigationBar: undefined }) - }) - - it('top-level nav-bar', () => { - const navBar = fakeWireframe('navigation_bar') - expect(stripBarsFromWireframes([navBar])).toEqual({ - appNodes: [], - statusBar: undefined, - navigationBar: navBar, - }) - }) - - it('nested nav-bar', () => { - const navBar = fakeWireframe('navigation_bar') - const sourceWithNavBar = [ - fakeWireframe('div', [fakeWireframe('div'), fakeWireframe('div', [navBar, fakeWireframe('div')])]), - ] - expect(stripBarsFromWireframes(sourceWithNavBar)).toEqual({ - appNodes: [fakeWireframe('div', [fakeWireframe('div'), fakeWireframe('div', [fakeWireframe('div')])])], - statusBar: undefined, - navigationBar: navBar, - }) - }) - }) -}) diff --git a/ee/frontend/mobile-replay/transformer/colors.ts b/ee/frontend/mobile-replay/transformer/colors.ts deleted file mode 100644 index 56a54b23d7..0000000000 --- a/ee/frontend/mobile-replay/transformer/colors.ts +++ /dev/null @@ -1,51 +0,0 @@ -// from https://gist.github.com/t1grok/a0f6d04db569890bcb57 - -interface rgb { - r: number - g: number - b: number -} -interface yuv { - y: number - u: number - v: number -} - -function hexToRgb(hexColor: string): rgb | null { - const shorthandRegex = /^#?([a-f\d])([a-f\d])([a-f\d])$/i - hexColor = hexColor.replace(shorthandRegex, function (_, r, g, b) { - return r + r + g + g + b + b - }) - - const result = /^#?([a-f\d]{2})([a-f\d]{2})([a-f\d]{2})$/i.exec(hexColor) - return result - ? { - r: parseInt(result[1], 16), - g: parseInt(result[2], 16), - b: parseInt(result[3], 16), - } - : null -} - -function rgbToYuv(rgbColor: rgb): yuv { - let y, u, v - - y = rgbColor.r * 0.299 + rgbColor.g * 0.587 + rgbColor.b * 0.114 - u = rgbColor.r * -0.168736 + rgbColor.g * -0.331264 + rgbColor.b * 0.5 + 128 - v = rgbColor.r * 0.5 + rgbColor.g * -0.418688 + rgbColor.b * -0.081312 + 128 - - y = Math.floor(y) - u = Math.floor(u) - v = Math.floor(v) - - return { y: y, u: u, v: v } -} - -export const isLight = (hexColor: string): boolean => { - const rgbColor = hexToRgb(hexColor) - if (!rgbColor) { - return false - } - const yuvColor = rgbToYuv(rgbColor) - return yuvColor.y > 128 -} diff --git a/ee/frontend/mobile-replay/transformer/screen-chrome.ts b/ee/frontend/mobile-replay/transformer/screen-chrome.ts deleted file mode 100644 index 16274ca853..0000000000 --- a/ee/frontend/mobile-replay/transformer/screen-chrome.ts +++ /dev/null @@ -1,178 +0,0 @@ -import { - keyboardEvent, - NodeType, - serializedNodeWithId, - wireframeNavigationBar, - wireframeStatusBar, -} from '../mobile.types' -import { isLight } from './colors' -import { - _isPositiveInteger, - BACKGROUND, - KEYBOARD_ID, - makePlaceholderElement, - NAVIGATION_BAR_ID, - STATUS_BAR_ID, -} from './transformers' -import { ConversionContext, ConversionResult } from './types' -import { asStyleString, makeStylesString } from './wireframeStyle' - -export let navigationBackgroundColor: string | undefined = undefined -export let navigationColor: string | undefined = undefined - -function spacerDiv(idSequence: Generator<number>): serializedNodeWithId { - const spacerId = idSequence.next().value - return { - type: NodeType.Element, - tagName: 'div', - attributes: { - style: 'width: 5px;', - 'data-rrweb-id': spacerId, - }, - id: spacerId, - childNodes: [], - } -} - -function makeFakeNavButton(icon: string, context: ConversionContext): serializedNodeWithId { - return { - type: NodeType.Element, - tagName: 'div', - attributes: {}, - id: context.idSequence.next().value, - childNodes: [ - { - type: NodeType.Text, - textContent: icon, - id: context.idSequence.next().value, - }, - ], - } -} - -export function makeNavigationBar( - wireframe: wireframeNavigationBar, - _children: serializedNodeWithId[], - context: ConversionContext -): ConversionResult<serializedNodeWithId> | null { - const _id = wireframe.id || NAVIGATION_BAR_ID - - const backArrowTriangle = makeFakeNavButton('β—€', context) - const homeCircle = makeFakeNavButton('βšͺ', context) - const screenButton = makeFakeNavButton('⬜️', context) - - navigationBackgroundColor = wireframe.style?.backgroundColor - navigationColor = isLight(navigationBackgroundColor || BACKGROUND) ? 'black' : 'white' - - return { - result: { - type: NodeType.Element, - tagName: 'div', - attributes: { - style: asStyleString([ - makeStylesString(wireframe), - 'display:flex', - 'flex-direction:row', - 'align-items:center', - 'justify-content:space-around', - 'color:' + navigationColor, - ]), - 'data-rrweb-id': _id, - }, - id: _id, - childNodes: [backArrowTriangle, homeCircle, screenButton], - }, - context, - } -} - -/** - * tricky: we need to accept children because that's the interface of converters, but we don't use them - */ -export function makeStatusBar( - wireframe: wireframeStatusBar, - _children: serializedNodeWithId[], - context: ConversionContext -): ConversionResult<serializedNodeWithId> { - const clockId = context.idSequence.next().value - // convert the wireframe timestamp to a date time, then get just the hour and minute of the time from that - const clockTime = context.timestamp - ? new Date(context.timestamp).toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' }) - : '' - - const clockFontColor = isLight(wireframe.style?.backgroundColor || '#ffffff') ? 'black' : 'white' - - const clock: serializedNodeWithId = { - type: NodeType.Element, - tagName: 'div', - attributes: { - 'data-rrweb-id': clockId, - }, - id: clockId, - childNodes: [ - { - type: NodeType.Text, - textContent: clockTime, - id: context.idSequence.next().value, - }, - ], - } - - return { - result: { - type: NodeType.Element, - tagName: 'div', - attributes: { - style: asStyleString([ - makeStylesString(wireframe, { color: clockFontColor }), - 'display:flex', - 'flex-direction:row', - 'align-items:center', - ]), - 'data-rrweb-id': STATUS_BAR_ID, - }, - id: STATUS_BAR_ID, - childNodes: [spacerDiv(context.idSequence), clock], - }, - context, - } -} - -export function makeOpenKeyboardPlaceholder( - mobileCustomEvent: keyboardEvent & { - timestamp: number - delay?: number - }, - context: ConversionContext -): ConversionResult<serializedNodeWithId> | null { - if (!mobileCustomEvent.data.payload.open) { - return null - } - - const shouldAbsolutelyPosition = - _isPositiveInteger(mobileCustomEvent.data.payload.x) || _isPositiveInteger(mobileCustomEvent.data.payload.y) - - return makePlaceholderElement( - { - id: KEYBOARD_ID, - type: 'placeholder', - label: 'keyboard', - height: mobileCustomEvent.data.payload.height, - width: _isPositiveInteger(mobileCustomEvent.data.payload.width) - ? mobileCustomEvent.data.payload.width - : '100vw', - style: { - backgroundColor: navigationBackgroundColor, - color: navigationBackgroundColor ? navigationColor : undefined, - }, - }, - [], - { - timestamp: context.timestamp, - idSequence: context.idSequence, - styleOverride: { - ...(shouldAbsolutelyPosition ? {} : { bottom: true }), - }, - } - ) -} diff --git a/ee/frontend/mobile-replay/transformer/transformers.ts b/ee/frontend/mobile-replay/transformer/transformers.ts deleted file mode 100644 index c209a6f58c..0000000000 --- a/ee/frontend/mobile-replay/transformer/transformers.ts +++ /dev/null @@ -1,1412 +0,0 @@ -import { - addedNodeMutation, - customEvent, - EventType, - fullSnapshotEvent, - incrementalSnapshotEvent, - IncrementalSource, - metaEvent, - mutationData, - removedNodeMutation, -} from '@rrweb/types' -import { captureMessage } from '@sentry/react' -import { isObject } from 'lib/utils' -import { PLACEHOLDER_SVG_DATA_IMAGE_URL } from 'scenes/session-recordings/player/rrweb' - -import { - attributes, - documentNode, - elementNode, - fullSnapshotEvent as MobileFullSnapshotEvent, - keyboardEvent, - metaEvent as MobileMetaEvent, - MobileIncrementalSnapshotEvent, - MobileNodeMutation, - MobileNodeType, - NodeType, - serializedNodeWithId, - textNode, - wireframe, - wireframeButton, - wireframeCheckBox, - wireframeDiv, - wireframeImage, - wireframeInputComponent, - wireframeNavigationBar, - wireframePlaceholder, - wireframeProgress, - wireframeRadio, - wireframeRadioGroup, - wireframeRectangle, - wireframeScreenshot, - wireframeSelect, - wireframeStatusBar, - wireframeText, - wireframeToggle, -} from '../mobile.types' -import { makeNavigationBar, makeOpenKeyboardPlaceholder, makeStatusBar } from './screen-chrome' -import { ConversionContext, ConversionResult } from './types' -import { - asStyleString, - makeBodyStyles, - makeColorStyles, - makeDeterminateProgressStyles, - makeHTMLStyles, - makeIndeterminateProgressStyles, - makeMinimalStyles, - makePositionStyles, - makeStylesString, -} from './wireframeStyle' - -export const BACKGROUND = '#f3f4ef' -const FOREGROUND = '#35373e' - -/** - * generates a sequence of ids - * from 100 to 9,999,999 - * the transformer reserves ids in the range 0 to 9,999,999 - * we reserve a range of ids because we need nodes to have stable ids across snapshots - * in order for incremental snapshots to work - * some mobile elements have to be wrapped in other elements in order to be styled correctly - * which means the web version of a mobile replay will use ids that don't exist in the mobile replay, - * and we need to ensure they don't clash - * ----- - * id is typed as a number in rrweb - * and there's a few places in their code where rrweb uses a check for `id === -1` to bail out of processing - * so, it's safest to assume that id is expected to be a positive integer - */ -function* ids(): Generator<number> { - let i = 100 - while (i < 9999999) { - yield i++ - } -} - -let globalIdSequence = ids() - -// there are some fixed ids that we need to use for fixed elements or artificial mutations -const DOCUMENT_ID = 1 -const HTML_DOC_TYPE_ID = 2 -const HTML_ELEMENT_ID = 3 -const HEAD_ID = 4 -const BODY_ID = 5 -// the nav bar should always be the last item in the body so that it is at the top of the stack -const NAVIGATION_BAR_PARENT_ID = 7 -export const NAVIGATION_BAR_ID = 8 -// the keyboard so that it is still before the nav bar -const KEYBOARD_PARENT_ID = 9 -export const KEYBOARD_ID = 10 -export const STATUS_BAR_PARENT_ID = 11 -export const STATUS_BAR_ID = 12 - -function isKeyboardEvent(x: unknown): x is keyboardEvent { - return isObject(x) && 'data' in x && isObject(x.data) && 'tag' in x.data && x.data.tag === 'keyboard' -} - -export function _isPositiveInteger(id: unknown): id is number { - return typeof id === 'number' && id > 0 && id % 1 === 0 -} - -function _isNullish(x: unknown): x is null | undefined { - return x === null || x === undefined -} - -function isRemovedNodeMutation(x: addedNodeMutation | removedNodeMutation): x is removedNodeMutation { - return isObject(x) && 'id' in x -} - -export const makeCustomEvent = ( - mobileCustomEvent: (customEvent | keyboardEvent) & { - timestamp: number - delay?: number - } -): (customEvent | incrementalSnapshotEvent) & { - timestamp: number - delay?: number -} => { - if (isKeyboardEvent(mobileCustomEvent)) { - // keyboard events are handled as incremental snapshots to add or remove a keyboard from the DOM - // TODO eventually we can pass something to makeIncrementalEvent here - const adds: addedNodeMutation[] = [] - const removes = [] - if (mobileCustomEvent.data.payload.open) { - const keyboardPlaceHolder = makeOpenKeyboardPlaceholder(mobileCustomEvent, { - timestamp: mobileCustomEvent.timestamp, - idSequence: globalIdSequence, - }) - if (keyboardPlaceHolder) { - adds.push({ - parentId: KEYBOARD_PARENT_ID, - nextId: null, - node: keyboardPlaceHolder.result, - }) - // mutations seem not to want a tree of nodes to add - // so even though `keyboardPlaceholder` is a tree with content - // we have to add the text content as well - adds.push({ - parentId: keyboardPlaceHolder.result.id, - nextId: null, - node: { - type: NodeType.Text, - id: globalIdSequence.next().value, - textContent: 'keyboard', - }, - }) - } else { - captureMessage('Failed to create keyboard placeholder', { extra: { mobileCustomEvent } }) - } - } else { - removes.push({ - parentId: KEYBOARD_PARENT_ID, - id: KEYBOARD_ID, - }) - } - const mutation: mutationData = { adds, attributes: [], removes, source: IncrementalSource.Mutation, texts: [] } - return { - type: EventType.IncrementalSnapshot, - data: mutation, - timestamp: mobileCustomEvent.timestamp, - } - } - return mobileCustomEvent -} - -export const makeMetaEvent = ( - mobileMetaEvent: MobileMetaEvent & { - timestamp: number - } -): metaEvent & { - timestamp: number - delay?: number -} => ({ - type: EventType.Meta, - data: { - href: mobileMetaEvent.data.href || '', // the replay doesn't use the href, so we safely ignore any absence - // mostly we need width and height in order to size the viewport - width: mobileMetaEvent.data.width, - height: mobileMetaEvent.data.height, - }, - timestamp: mobileMetaEvent.timestamp, -}) - -export function makeDivElement( - wireframe: wireframeDiv, - children: serializedNodeWithId[], - context: ConversionContext -): ConversionResult<serializedNodeWithId> | null { - const _id = _isPositiveInteger(wireframe.id) ? wireframe.id : context.idSequence.next().value - return { - result: { - type: NodeType.Element, - tagName: 'div', - attributes: { - style: asStyleString([makeStylesString(wireframe), 'overflow:hidden', 'white-space:nowrap']), - 'data-rrweb-id': _id, - }, - id: _id, - childNodes: children, - }, - context, - } -} - -function makeTextElement( - wireframe: wireframeText, - children: serializedNodeWithId[], - context: ConversionContext -): ConversionResult<serializedNodeWithId> | null { - if (wireframe.type !== 'text') { - console.error('Passed incorrect wireframe type to makeTextElement') - return null - } - - // because we might have to style the text, we always wrap it in a div - // and apply styles to that - const id = context.idSequence.next().value - - const childNodes = [...children] - if (!_isNullish(wireframe.text)) { - childNodes.unshift({ - type: NodeType.Text, - textContent: wireframe.text, - // since the text node is wrapped, we assign it a synthetic id - id, - }) - } - - return { - result: { - type: NodeType.Element, - tagName: 'div', - attributes: { - style: asStyleString([makeStylesString(wireframe), 'overflow:hidden', 'white-space:normal']), - 'data-rrweb-id': wireframe.id, - }, - id: wireframe.id, - childNodes, - }, - context, - } -} - -function makeWebViewElement( - wireframe: wireframe, - children: serializedNodeWithId[], - context: ConversionContext -): ConversionResult<serializedNodeWithId> | null { - const labelledWireframe: wireframePlaceholder = { ...wireframe } as wireframePlaceholder - if ('url' in wireframe) { - labelledWireframe.label = wireframe.url - } - - return makePlaceholderElement(labelledWireframe, children, context) -} - -export function makePlaceholderElement( - wireframe: wireframe, - children: serializedNodeWithId[], - context: ConversionContext -): ConversionResult<serializedNodeWithId> | null { - const txt = 'label' in wireframe && wireframe.label ? wireframe.label : wireframe.type || 'PLACEHOLDER' - return { - result: { - type: NodeType.Element, - tagName: 'div', - attributes: { - style: makeStylesString(wireframe, { - verticalAlign: 'center', - horizontalAlign: 'center', - backgroundColor: wireframe.style?.backgroundColor || BACKGROUND, - color: wireframe.style?.color || FOREGROUND, - backgroundImage: PLACEHOLDER_SVG_DATA_IMAGE_URL, - backgroundSize: 'auto', - backgroundRepeat: 'unset', - ...context.styleOverride, - }), - 'data-rrweb-id': wireframe.id, - }, - id: wireframe.id, - childNodes: [ - { - type: NodeType.Text, - // since the text node is wrapped, we assign it a synthetic id - id: context.idSequence.next().value, - textContent: txt, - }, - ...children, - ], - }, - context, - } -} - -export function dataURIOrPNG(src: string): string { - // replace all new lines in src - src = src.replace(/\r?\n|\r/g, '') - if (!src.startsWith('data:image/')) { - return 'data:image/png;base64,' + src - } - return src -} - -function makeImageElement( - wireframe: wireframeImage | wireframeScreenshot, - children: serializedNodeWithId[], - context: ConversionContext -): ConversionResult<serializedNodeWithId> | null { - if (!wireframe.base64) { - return makePlaceholderElement(wireframe, children, context) - } - - const src = dataURIOrPNG(wireframe.base64) - return { - result: { - type: NodeType.Element, - tagName: 'img', - attributes: { - src: src, - width: wireframe.width, - height: wireframe.height, - style: makeStylesString(wireframe), - 'data-rrweb-id': wireframe.id, - }, - id: wireframe.id, - childNodes: children, - }, - context, - } -} - -function inputAttributes<T extends wireframeInputComponent>(wireframe: T): attributes { - const attributes = { - style: makeStylesString(wireframe), - type: wireframe.inputType, - ...(wireframe.disabled ? { disabled: wireframe.disabled } : {}), - 'data-rrweb-id': wireframe.id, - } - - switch (wireframe.inputType) { - case 'checkbox': - return { - ...attributes, - style: null, // checkboxes are styled by being combined with a label - ...(wireframe.checked ? { checked: wireframe.checked } : {}), - } - case 'toggle': - return { - ...attributes, - style: null, // toggle are styled by being combined with a label - ...(wireframe.checked ? { checked: wireframe.checked } : {}), - } - case 'radio': - return { - ...attributes, - style: null, // radio buttons are styled by being combined with a label - ...(wireframe.checked ? { checked: wireframe.checked } : {}), - // radio value defaults to the string "on" if not specified - // we're not really submitting the form, so it doesn't matter 🀞 - // radio name is used to correctly uncheck values when one is checked - // mobile doesn't really have it, and we will be checking based on snapshots, - // so we can ignore it for now - } - case 'button': - return { - ...attributes, - } - case 'text_area': - return { - ...attributes, - value: wireframe.value || '', - } - case 'progress': - return { - ...attributes, - // indeterminate when omitted - value: wireframe.value || null, - // defaults to 1 when omitted - max: wireframe.max || null, - type: null, // progress has no type attribute - } - default: - return { - ...attributes, - value: wireframe.value || '', - } - } -} - -function makeButtonElement( - wireframe: wireframeButton, - children: serializedNodeWithId[], - context: ConversionContext -): ConversionResult<serializedNodeWithId> | null { - const buttonText: textNode | null = wireframe.value - ? { - type: NodeType.Text, - textContent: wireframe.value, - } - : null - - return { - result: { - type: NodeType.Element, - tagName: 'button', - attributes: inputAttributes(wireframe), - id: wireframe.id, - childNodes: buttonText ? [{ ...buttonText, id: context.idSequence.next().value }, ...children] : children, - }, - context, - } -} - -function makeSelectOptionElement( - option: string, - selected: boolean, - context: ConversionContext -): ConversionResult<serializedNodeWithId> { - const optionId = context.idSequence.next().value - return { - result: { - type: NodeType.Element, - tagName: 'option', - attributes: { - ...(selected ? { selected: selected } : {}), - 'data-rrweb-id': optionId, - }, - id: optionId, - childNodes: [ - { - type: NodeType.Text, - textContent: option, - id: context.idSequence.next().value, - }, - ], - }, - context, - } -} - -function makeSelectElement( - wireframe: wireframeSelect, - children: serializedNodeWithId[], - context: ConversionContext -): ConversionResult<serializedNodeWithId> | null { - const selectOptions: serializedNodeWithId[] = [] - if (wireframe.options) { - let optionContext = context - for (let i = 0; i < wireframe.options.length; i++) { - const option = wireframe.options[i] - const conversion = makeSelectOptionElement(option, wireframe.value === option, optionContext) - selectOptions.push(conversion.result) - optionContext = conversion.context - } - } - return { - result: { - type: NodeType.Element, - tagName: 'select', - attributes: inputAttributes(wireframe), - id: wireframe.id, - childNodes: [...selectOptions, ...children], - }, - context, - } -} - -function groupRadioButtons(children: serializedNodeWithId[], radioGroupName: string): serializedNodeWithId[] { - return children.map((child) => { - if (child.type === NodeType.Element && child.tagName === 'input' && child.attributes.type === 'radio') { - return { - ...child, - attributes: { - ...child.attributes, - name: radioGroupName, - 'data-rrweb-id': child.id, - }, - } - } - return child - }) -} - -function makeRadioGroupElement( - wireframe: wireframeRadioGroup, - children: serializedNodeWithId[], - context: ConversionContext -): ConversionResult<serializedNodeWithId> | null { - const radioGroupName = 'radio_group_' + wireframe.id - return { - result: { - type: NodeType.Element, - tagName: 'div', - attributes: { - style: makeStylesString(wireframe), - 'data-rrweb-id': wireframe.id, - }, - id: wireframe.id, - childNodes: groupRadioButtons(children, radioGroupName), - }, - context, - } -} - -function makeStar(title: string, path: string, context: ConversionContext): serializedNodeWithId { - const svgId = context.idSequence.next().value - const titleId = context.idSequence.next().value - const pathId = context.idSequence.next().value - return { - type: NodeType.Element, - tagName: 'svg', - isSVG: true, - attributes: { - style: asStyleString(['height: 100%', 'overflow-clip-margin: content-box', 'overflow:hidden']), - viewBox: '0 0 24 24', - fill: 'currentColor', - 'data-rrweb-id': svgId, - }, - id: svgId, - childNodes: [ - { - type: NodeType.Element, - tagName: 'title', - isSVG: true, - attributes: { - 'data-rrweb-id': titleId, - }, - id: titleId, - childNodes: [ - { - type: NodeType.Text, - textContent: title, - id: context.idSequence.next().value, - }, - ], - }, - { - type: NodeType.Element, - tagName: 'path', - isSVG: true, - attributes: { - d: path, - 'data-rrweb-id': pathId, - }, - id: pathId, - childNodes: [], - }, - ], - } -} - -function filledStar(context: ConversionContext): serializedNodeWithId { - return makeStar( - 'filled star', - 'M12,17.27L18.18,21L16.54,13.97L22,9.24L14.81,8.62L12,2L9.19,8.62L2,9.24L7.45,13.97L5.82,21L12,17.27Z', - context - ) -} - -function halfStar(context: ConversionContext): serializedNodeWithId { - return makeStar( - 'half-filled star', - 'M12,15.4V6.1L13.71,10.13L18.09,10.5L14.77,13.39L15.76,17.67M22,9.24L14.81,8.63L12,2L9.19,8.63L2,9.24L7.45,13.97L5.82,21L12,17.27L18.18,21L16.54,13.97L22,9.24Z', - context - ) -} - -function emptyStar(context: ConversionContext): serializedNodeWithId { - return makeStar( - 'empty star', - 'M12,15.39L8.24,17.66L9.23,13.38L5.91,10.5L10.29,10.13L12,6.09L13.71,10.13L18.09,10.5L14.77,13.38L15.76,17.66M22,9.24L14.81,8.63L12,2L9.19,8.63L2,9.24L7.45,13.97L5.82,21L12,17.27L18.18,21L16.54,13.97L22,9.24Z', - context - ) -} - -function makeRatingBar( - wireframe: wireframeProgress, - children: serializedNodeWithId[], - context: ConversionContext -): ConversionResult<serializedNodeWithId> | null { - // max is the number of stars... and value is the number of stars to fill - - // deliberate double equals, because we want to allow null and undefined - if (wireframe.value == null || wireframe.max == null) { - return makePlaceholderElement(wireframe, children, context) - } - - const numberOfFilledStars = Math.floor(wireframe.value) - const numberOfHalfStars = wireframe.value - numberOfFilledStars > 0 ? 1 : 0 - const numberOfEmptyStars = wireframe.max - numberOfFilledStars - numberOfHalfStars - - const filledStars = Array(numberOfFilledStars) - .fill(undefined) - .map(() => filledStar(context)) - const halfStars = Array(numberOfHalfStars) - .fill(undefined) - .map(() => halfStar(context)) - const emptyStars = Array(numberOfEmptyStars) - .fill(undefined) - .map(() => emptyStar(context)) - - const ratingBarId = context.idSequence.next().value - const ratingBar = { - type: NodeType.Element, - tagName: 'div', - id: ratingBarId, - attributes: { - style: asStyleString([ - makeColorStyles(wireframe), - 'position: relative', - 'display: flex', - 'flex-direction: row', - 'padding: 2px 4px', - ]), - 'data-rrweb-id': ratingBarId, - }, - childNodes: [...filledStars, ...halfStars, ...emptyStars], - } as serializedNodeWithId - - return { - result: { - type: NodeType.Element, - tagName: 'div', - attributes: { - style: makeStylesString(wireframe), - 'data-rrweb-id': wireframe.id, - }, - id: wireframe.id, - childNodes: [ratingBar, ...children], - }, - context, - } -} - -function makeProgressElement( - wireframe: wireframeProgress, - children: serializedNodeWithId[], - context: ConversionContext -): ConversionResult<serializedNodeWithId> | null { - if (wireframe.style?.bar === 'circular') { - // value needs to be expressed as a number between 0 and 100 - const max = wireframe.max || 1 - let value = wireframe.value || null - if (_isPositiveInteger(value) && value <= max) { - value = (value / max) * 100 - } else { - value = null - } - - const styleOverride = { - color: wireframe.style?.color || FOREGROUND, - backgroundColor: wireframe.style?.backgroundColor || BACKGROUND, - } - - // if not _isPositiveInteger(value) then we render a spinner, - // so we need to add a style element with the spin keyframe - const stylingChildren: serializedNodeWithId[] = _isPositiveInteger(value) - ? [] - : [ - { - type: NodeType.Element, - tagName: 'style', - attributes: { - type: 'text/css', - }, - id: context.idSequence.next().value, - childNodes: [ - { - type: NodeType.Text, - textContent: `@keyframes spin { 0% { transform: rotate(0deg); } 100% { transform: rotate(360deg); } }`, - id: context.idSequence.next().value, - }, - ], - }, - ] - - const wrappingDivId = context.idSequence.next().value - return { - result: { - type: NodeType.Element, - tagName: 'div', - attributes: { - style: makeMinimalStyles(wireframe), - 'data-rrweb-id': wireframe.id, - }, - id: wireframe.id, - childNodes: [ - { - type: NodeType.Element, - tagName: 'div', - attributes: { - // with no provided value we render a spinner - style: _isPositiveInteger(value) - ? makeDeterminateProgressStyles(wireframe, styleOverride) - : makeIndeterminateProgressStyles(wireframe, styleOverride), - 'data-rrweb-id': wrappingDivId, - }, - id: wrappingDivId, - childNodes: stylingChildren, - }, - ...children, - ], - }, - context, - } - } else if (wireframe.style?.bar === 'rating') { - return makeRatingBar(wireframe, children, context) - } - return { - result: { - type: NodeType.Element, - tagName: 'progress', - attributes: inputAttributes(wireframe), - id: wireframe.id, - childNodes: children, - }, - context, - } -} - -function makeToggleParts(wireframe: wireframeToggle, context: ConversionContext): serializedNodeWithId[] { - const togglePosition = wireframe.checked ? 'right' : 'left' - const defaultColor = wireframe.checked ? '#1d4aff' : BACKGROUND - const sliderPartId = context.idSequence.next().value - const handlePartId = context.idSequence.next().value - return [ - { - type: NodeType.Element, - tagName: 'div', - attributes: { - 'data-toggle-part': 'slider', - style: asStyleString([ - 'position:absolute', - 'top:33%', - 'left:5%', - 'display:inline-block', - 'width:75%', - 'height:33%', - 'opacity: 0.2', - 'border-radius:7.5%', - `background-color:${wireframe.style?.color || defaultColor}`, - ]), - 'data-rrweb-id': sliderPartId, - }, - id: sliderPartId, - childNodes: [], - }, - { - type: NodeType.Element, - tagName: 'div', - attributes: { - 'data-toggle-part': 'handle', - style: asStyleString([ - 'position:absolute', - 'top:1.5%', - `${togglePosition}:5%`, - 'display:flex', - 'align-items:center', - 'justify-content:center', - 'width:40%', - 'height:75%', - 'cursor:inherit', - 'border-radius:50%', - `background-color:${wireframe.style?.color || defaultColor}`, - `border:2px solid ${wireframe.style?.borderColor || wireframe.style?.color || defaultColor}`, - ]), - 'data-rrweb-id': handlePartId, - }, - id: handlePartId, - childNodes: [], - }, - ] -} - -function makeToggleElement( - wireframe: wireframeToggle, - context: ConversionContext -): ConversionResult< - elementNode & { - id: number - } -> | null { - const isLabelled = 'label' in wireframe - const wrappingDivId = context.idSequence.next().value - return { - result: { - type: NodeType.Element, - tagName: 'div', - attributes: { - // if labelled take up available space, otherwise use provided positioning - style: isLabelled ? asStyleString(['height:100%', 'flex:1']) : makePositionStyles(wireframe), - 'data-rrweb-id': wireframe.id, - }, - id: wireframe.id, - childNodes: [ - { - type: NodeType.Element, - tagName: 'div', - attributes: { - // relative position, fills parent - style: asStyleString(['position:relative', 'width:100%', 'height:100%']), - 'data-rrweb-id': wrappingDivId, - }, - id: wrappingDivId, - childNodes: makeToggleParts(wireframe, context), - }, - ], - }, - context, - } -} - -function makeLabelledInput( - wireframe: wireframeCheckBox | wireframeRadio | wireframeToggle, - theInputElement: serializedNodeWithId, - context: ConversionContext -): ConversionResult<serializedNodeWithId> { - const theLabel: serializedNodeWithId = { - type: NodeType.Text, - textContent: wireframe.label || '', - id: context.idSequence.next().value, - } - - const orderedChildren = wireframe.inputType === 'toggle' ? [theLabel, theInputElement] : [theInputElement, theLabel] - - const labelId = context.idSequence.next().value - return { - result: { - type: NodeType.Element, - tagName: 'label', - attributes: { - style: makeStylesString(wireframe), - 'data-rrweb-id': labelId, - }, - id: labelId, - childNodes: orderedChildren, - }, - context, - } -} - -function makeInputElement( - wireframe: wireframeInputComponent, - children: serializedNodeWithId[], - context: ConversionContext -): ConversionResult<serializedNodeWithId> | null { - if (!wireframe.inputType) { - return null - } - - if (wireframe.inputType === 'button') { - return makeButtonElement(wireframe, children, context) - } - - if (wireframe.inputType === 'select') { - return makeSelectElement(wireframe, children, context) - } - - if (wireframe.inputType === 'progress') { - return makeProgressElement(wireframe, children, context) - } - - const theInputElement: ConversionResult<serializedNodeWithId> | null = - wireframe.inputType === 'toggle' - ? makeToggleElement(wireframe, context) - : { - result: { - type: NodeType.Element, - tagName: 'input', - attributes: inputAttributes(wireframe), - id: wireframe.id, - childNodes: children, - }, - context, - } - - if (!theInputElement) { - return null - } - - if ('label' in wireframe) { - return makeLabelledInput(wireframe, theInputElement.result, theInputElement.context) - } - // when labelled no styles are needed, when un-labelled as here - we add the styling in. - ;(theInputElement.result as elementNode).attributes.style = makeStylesString(wireframe) - return theInputElement -} - -function makeRectangleElement( - wireframe: wireframeRectangle, - children: serializedNodeWithId[], - context: ConversionContext -): ConversionResult<serializedNodeWithId> | null { - return { - result: { - type: NodeType.Element, - tagName: 'div', - attributes: { - style: makeStylesString(wireframe), - 'data-rrweb-id': wireframe.id, - }, - id: wireframe.id, - childNodes: children, - }, - context, - } -} - -function chooseConverter<T extends wireframe>( - wireframe: T -): ( - wireframe: T, - children: serializedNodeWithId[], - context: ConversionContext -) => ConversionResult<serializedNodeWithId> | null { - // in theory type is always present - // but since this is coming over the wire we can't really be sure, - // and so we default to div - const converterType: MobileNodeType = wireframe.type || 'div' - const converterMapping: Record< - MobileNodeType, - (wireframe: T, children: serializedNodeWithId[]) => ConversionResult<serializedNodeWithId> | null - > = { - // KLUDGE: TS can't tell that the wireframe type of each function is safe based on the converter type - text: makeTextElement as any, - image: makeImageElement as any, - rectangle: makeRectangleElement as any, - div: makeDivElement as any, - input: makeInputElement as any, - radio_group: makeRadioGroupElement as any, - web_view: makeWebViewElement as any, - placeholder: makePlaceholderElement as any, - status_bar: makeStatusBar as any, - navigation_bar: makeNavigationBar as any, - screenshot: makeImageElement as any, - } - return converterMapping[converterType] -} - -function convertWireframe( - wireframe: wireframe, - context: ConversionContext -): ConversionResult<serializedNodeWithId> | null { - const children = convertWireframesFor(wireframe.childWireframes, context) - const converted = chooseConverter(wireframe)?.(wireframe, children.result, children.context) - return converted || null -} - -function convertWireframesFor( - wireframes: wireframe[] | undefined, - context: ConversionContext -): ConversionResult<serializedNodeWithId[]> { - if (!wireframes) { - return { result: [], context } - } - - const result: serializedNodeWithId[] = [] - for (const wireframe of wireframes) { - const converted = convertWireframe(wireframe, context) - if (converted) { - result.push(converted.result) - context = converted.context - } - } - return { result, context } -} - -function isMobileIncrementalSnapshotEvent(x: unknown): x is MobileIncrementalSnapshotEvent { - const isIncrementalSnapshot = isObject(x) && 'type' in x && x.type === EventType.IncrementalSnapshot - if (!isIncrementalSnapshot) { - return false - } - const hasData = isObject(x) && 'data' in x - const data = hasData ? x.data : null - - const hasMutationSource = isObject(data) && 'source' in data && data.source === IncrementalSource.Mutation - - const adds = isObject(data) && 'adds' in data && Array.isArray(data.adds) ? data.adds : null - const updates = isObject(data) && 'updates' in data && Array.isArray(data.updates) ? data.updates : null - - const hasUpdatedWireframe = !!updates && updates.length > 0 && isObject(updates[0]) && 'wireframe' in updates[0] - const hasAddedWireframe = !!adds && adds.length > 0 && isObject(adds[0]) && 'wireframe' in adds[0] - - return hasMutationSource && (hasAddedWireframe || hasUpdatedWireframe) -} - -function chooseParentId(nodeType: MobileNodeType, providedParentId: number): number { - return nodeType === 'screenshot' ? BODY_ID : providedParentId -} - -function makeIncrementalAdd(add: MobileNodeMutation, context: ConversionContext): addedNodeMutation[] | null { - const converted = convertWireframe(add.wireframe, context) - - if (!converted) { - return null - } - - const addition: addedNodeMutation = { - parentId: chooseParentId(add.wireframe.type, add.parentId), - nextId: null, - node: converted.result, - } - const adds: addedNodeMutation[] = [] - if (addition) { - const flattened = flattenMutationAdds(addition) - flattened.forEach((x) => adds.push(x)) - return adds - } - return null -} - -/** - * When processing an update we remove the entire item, and then add it back in. - */ -function makeIncrementalRemoveForUpdate(update: MobileNodeMutation): removedNodeMutation { - return { - parentId: chooseParentId(update.wireframe.type, update.parentId), - id: update.wireframe.id, - } -} - -function isNode(x: unknown): x is serializedNodeWithId { - // KLUDGE: really we should check that x.type is valid, but we're safe enough already - return isObject(x) && 'type' in x && 'id' in x -} - -function isNodeWithChildren(x: unknown): x is elementNode | documentNode { - return isNode(x) && 'childNodes' in x && Array.isArray(x.childNodes) -} - -/** - * when creating incremental adds we have to flatten the node tree structure - * there's no point, then keeping those child nodes in place - */ -function cloneWithoutChildren(converted: addedNodeMutation): addedNodeMutation { - const cloned = { ...converted } - const clonedNode: serializedNodeWithId = { ...converted.node } - if (isNodeWithChildren(clonedNode)) { - clonedNode.childNodes = [] - } - cloned.node = clonedNode - return cloned -} - -function flattenMutationAdds(converted: addedNodeMutation): addedNodeMutation[] { - const flattened: addedNodeMutation[] = [] - - flattened.push(cloneWithoutChildren(converted)) - - const node: unknown = converted.node - const newParentId = converted.node.id - if (isNodeWithChildren(node)) { - node.childNodes.forEach((child) => { - flattened.push( - cloneWithoutChildren({ - parentId: newParentId, - nextId: null, - node: child, - }) - ) - if (isNodeWithChildren(child)) { - flattened.push(...flattenMutationAdds({ parentId: newParentId, nextId: null, node: child })) - } - }) - } - return flattened -} - -/** - * each update wireframe carries the entire tree because we don't want to diff on the client - * that means that we might create multiple mutations for the same node - * we only want to add it once, so we dedupe the mutations - * the app guarantees that for a given ID that is present more than once in a single snapshot - * every instance of that ID is identical - * it might change in the next snapshot but for a single incremental snapshot there is one - * and only one version of any given ID - */ -function dedupeMutations<T extends addedNodeMutation | removedNodeMutation>(mutations: T[]): T[] { - // KLUDGE: it's slightly yucky to stringify everything but since synthetic nodes - // introduce a new id, we can't just compare the id - const seen = new Set<string>() - - // in case later mutations are the ones we want to keep, we reverse the array - // this does help with the deduping, so, it's likely that the view for a single ID - // is not consistent over a snapshot, but it's cheap to reverse so :YOLO: - return mutations - .reverse() - .filter((mutation: addedNodeMutation | removedNodeMutation) => { - let toCompare: string - if (isRemovedNodeMutation(mutation)) { - toCompare = JSON.stringify(mutation) - } else { - // if this is a synthetic addition, then we need to ignore the id, - // since duplicates won't have duplicate ids - toCompare = JSON.stringify({ - ...mutation.node, - id: 0, - }) - } - - if (seen.has(toCompare)) { - return false - } - seen.add(toCompare) - return true - }) - .reverse() -} - -/** - * We want to ensure that any events don't use id = 0. - * They must always represent a valid ID from the dom, so we swap in the body id when the id = 0. - * - * For "removes", we don't need to do anything, the id of the element to be removed remains valid. We won't try and remove other elements that we added during transformation in order to show that element. - * - * "adds" are converted from wireframes to nodes and converted to `incrementalSnapshotEvent.adds` - * - * "updates" are converted to a remove and an add. - * - */ -export const makeIncrementalEvent = ( - mobileEvent: (MobileIncrementalSnapshotEvent | incrementalSnapshotEvent) & { - timestamp: number - delay?: number - } -): incrementalSnapshotEvent & { - timestamp: number - delay?: number -} => { - const converted = mobileEvent as unknown as incrementalSnapshotEvent & { - timestamp: number - delay?: number - } - if ('id' in converted.data && converted.data.id === 0) { - converted.data.id = BODY_ID - } - - if (isMobileIncrementalSnapshotEvent(mobileEvent)) { - const adds: addedNodeMutation[] = [] - const removes: removedNodeMutation[] = mobileEvent.data.removes || [] - if ('adds' in mobileEvent.data && Array.isArray(mobileEvent.data.adds)) { - const addsContext = { - timestamp: mobileEvent.timestamp, - idSequence: globalIdSequence, - } - - mobileEvent.data.adds.forEach((add) => { - makeIncrementalAdd(add, addsContext)?.forEach((x) => adds.push(x)) - }) - } - if ('updates' in mobileEvent.data && Array.isArray(mobileEvent.data.updates)) { - const updatesContext = { - timestamp: mobileEvent.timestamp, - idSequence: globalIdSequence, - } - const updateAdditions: addedNodeMutation[] = [] - mobileEvent.data.updates.forEach((update) => { - const removal = makeIncrementalRemoveForUpdate(update) - if (removal) { - removes.push(removal) - } - makeIncrementalAdd(update, updatesContext)?.forEach((x) => updateAdditions.push(x)) - }) - dedupeMutations(updateAdditions).forEach((x) => adds.push(x)) - } - - converted.data = { - source: IncrementalSource.Mutation, - attributes: [], - texts: [], - adds: dedupeMutations(adds), - // TODO: this assumes that removes are processed before adds 🀞 - removes: dedupeMutations(removes), - } - } - - return converted -} - -function makeKeyboardParent(): serializedNodeWithId { - return { - type: NodeType.Element, - tagName: 'div', - attributes: { - 'data-render-reason': 'a fixed placeholder to contain the keyboard in the correct stacking position', - 'data-rrweb-id': KEYBOARD_PARENT_ID, - }, - id: KEYBOARD_PARENT_ID, - childNodes: [], - } -} - -function makeStatusBarNode( - statusBar: wireframeStatusBar | undefined, - context: ConversionContext -): serializedNodeWithId { - const childNodes = statusBar ? convertWireframesFor([statusBar], context).result : [] - return { - type: NodeType.Element, - tagName: 'div', - attributes: { - 'data-rrweb-id': STATUS_BAR_PARENT_ID, - }, - id: STATUS_BAR_PARENT_ID, - childNodes, - } -} - -function makeNavBarNode( - navigationBar: wireframeNavigationBar | undefined, - context: ConversionContext -): serializedNodeWithId { - const childNodes = navigationBar ? convertWireframesFor([navigationBar], context).result : [] - return { - type: NodeType.Element, - tagName: 'div', - attributes: { - 'data-rrweb-id': NAVIGATION_BAR_PARENT_ID, - }, - id: NAVIGATION_BAR_PARENT_ID, - childNodes, - } -} - -function stripBarsFromWireframe(wireframe: wireframe): { - wireframe: wireframe | undefined - statusBar: wireframeStatusBar | undefined - navBar: wireframeNavigationBar | undefined -} { - if (wireframe.type === 'status_bar') { - return { wireframe: undefined, statusBar: wireframe, navBar: undefined } - } else if (wireframe.type === 'navigation_bar') { - return { wireframe: undefined, statusBar: undefined, navBar: wireframe } - } - let statusBar: wireframeStatusBar | undefined - let navBar: wireframeNavigationBar | undefined - const wireframeToReturn: wireframe | undefined = { ...wireframe } - wireframeToReturn.childWireframes = [] - for (const child of wireframe.childWireframes || []) { - const { - wireframe: childWireframe, - statusBar: childStatusBar, - navBar: childNavBar, - } = stripBarsFromWireframe(child) - statusBar = statusBar || childStatusBar - navBar = navBar || childNavBar - if (childWireframe) { - wireframeToReturn.childWireframes.push(childWireframe) - } - } - return { wireframe: wireframeToReturn, statusBar, navBar } -} - -/** - * We want to be able to place the status bar and navigation bar in the correct stacking order. - * So, we lift them out of the tree, and return them separately. - */ -export function stripBarsFromWireframes(wireframes: wireframe[]): { - statusBar: wireframeStatusBar | undefined - navigationBar: wireframeNavigationBar | undefined - appNodes: wireframe[] -} { - let statusBar: wireframeStatusBar | undefined - let navigationBar: wireframeNavigationBar | undefined - const copiedNodes: wireframe[] = [] - - wireframes.forEach((w) => { - const matches = stripBarsFromWireframe(w) - if (matches.statusBar) { - statusBar = matches.statusBar - } - if (matches.navBar) { - navigationBar = matches.navBar - } - if (matches.wireframe) { - copiedNodes.push(matches.wireframe) - } - }) - return { statusBar, navigationBar, appNodes: copiedNodes } -} - -export const makeFullEvent = ( - mobileEvent: MobileFullSnapshotEvent & { - timestamp: number - delay?: number - } -): fullSnapshotEvent & { - timestamp: number - delay?: number -} => { - // we can restart the id sequence on each full snapshot - globalIdSequence = ids() - - if (!('wireframes' in mobileEvent.data)) { - return mobileEvent as unknown as fullSnapshotEvent & { - timestamp: number - delay?: number - } - } - - const conversionContext = { - timestamp: mobileEvent.timestamp, - idSequence: globalIdSequence, - } - - const { statusBar, navigationBar, appNodes } = stripBarsFromWireframes(mobileEvent.data.wireframes) - - const nodeGroups = { - appNodes: convertWireframesFor(appNodes, conversionContext).result || [], - statusBarNode: makeStatusBarNode(statusBar, conversionContext), - navBarNode: makeNavBarNode(navigationBar, conversionContext), - } - - return { - type: EventType.FullSnapshot, - timestamp: mobileEvent.timestamp, - data: { - node: { - type: NodeType.Document, - childNodes: [ - { - type: NodeType.DocumentType, - name: 'html', - publicId: '', - systemId: '', - id: HTML_DOC_TYPE_ID, - }, - { - type: NodeType.Element, - tagName: 'html', - attributes: { style: makeHTMLStyles(), 'data-rrweb-id': HTML_ELEMENT_ID }, - id: HTML_ELEMENT_ID, - childNodes: [ - { - type: NodeType.Element, - tagName: 'head', - attributes: { 'data-rrweb-id': HEAD_ID }, - id: HEAD_ID, - childNodes: [makeCSSReset(conversionContext)], - }, - { - type: NodeType.Element, - tagName: 'body', - attributes: { style: makeBodyStyles(), 'data-rrweb-id': BODY_ID }, - id: BODY_ID, - childNodes: [ - // in the order they should stack if they ever clash - // lower is higher in the stacking context - ...nodeGroups.appNodes, - makeKeyboardParent(), - nodeGroups.navBarNode, - nodeGroups.statusBarNode, - ], - }, - ], - }, - ], - id: DOCUMENT_ID, - }, - initialOffset: { - top: 0, - left: 0, - }, - }, - } -} - -function makeCSSReset(context: ConversionContext): serializedNodeWithId { - // we need to normalize CSS so browsers don't do unexpected things - return { - type: NodeType.Element, - tagName: 'style', - attributes: { - type: 'text/css', - }, - id: context.idSequence.next().value, - childNodes: [ - { - type: NodeType.Text, - textContent: ` - body { - margin: unset; - } - input, button, select, textarea { - font: inherit; - margin: 0; - padding: 0; - border: 0; - outline: 0; - background: transparent; - padding-block: 0 !important; - } - .input:focus { - outline: none; - } - img { - border-style: none; - } - `, - id: context.idSequence.next().value, - }, - ], - } -} diff --git a/ee/frontend/mobile-replay/transformer/types.ts b/ee/frontend/mobile-replay/transformer/types.ts deleted file mode 100644 index 3ba93d6fc2..0000000000 --- a/ee/frontend/mobile-replay/transformer/types.ts +++ /dev/null @@ -1,17 +0,0 @@ -import { MobileStyles } from '../mobile.types' - -export interface ConversionResult<T> { - result: T - context: ConversionContext -} - -export interface ConversionContext { - timestamp: number - idSequence: Generator<number> - styleOverride?: StyleOverride -} - -// StyleOverride is defined here and not in the schema -// because these are overrides that the transformer is allowed to make -// not that clients are allowed to request -export type StyleOverride = MobileStyles & { bottom?: true; backgroundRepeat?: 'no-repeat' | 'unset' } diff --git a/ee/frontend/mobile-replay/transformer/wireframeStyle.ts b/ee/frontend/mobile-replay/transformer/wireframeStyle.ts deleted file mode 100644 index 1719060589..0000000000 --- a/ee/frontend/mobile-replay/transformer/wireframeStyle.ts +++ /dev/null @@ -1,269 +0,0 @@ -import { wireframe, wireframeProgress } from '../mobile.types' -import { dataURIOrPNG } from './transformers' -import { StyleOverride } from './types' - -function ensureTrailingSemicolon(styles: string): string { - return styles.endsWith(';') ? styles : styles + ';' -} - -function stripTrailingSemicolon(styles: string): string { - return styles.endsWith(';') ? styles.slice(0, -1) : styles -} - -export function asStyleString(styleParts: string[]): string { - if (styleParts.length === 0) { - return '' - } - return ensureTrailingSemicolon( - styleParts - .map(stripTrailingSemicolon) - .filter((x) => !!x) - .join(';') - ) -} - -function isNumber(candidate: unknown): candidate is number { - return typeof candidate === 'number' -} - -function isString(candidate: unknown): candidate is string { - return typeof candidate === 'string' -} - -function isUnitLike(candidate: unknown): candidate is string | number { - return isNumber(candidate) || (isString(candidate) && candidate.length > 0) -} - -function ensureUnit(value: string | number): string { - return isNumber(value) ? `${value}px` : value.replace(/px$/g, '') + 'px' -} - -function makeBorderStyles(wireframe: wireframe, styleOverride?: StyleOverride): string { - const styleParts: string[] = [] - - const combinedStyles = { - ...wireframe.style, - ...styleOverride, - } - - if (isUnitLike(combinedStyles.borderWidth)) { - const borderWidth = ensureUnit(combinedStyles.borderWidth) - styleParts.push(`border-width: ${borderWidth}`) - } - if (isUnitLike(combinedStyles.borderRadius)) { - const borderRadius = ensureUnit(combinedStyles.borderRadius) - styleParts.push(`border-radius: ${borderRadius}`) - } - if (combinedStyles?.borderColor) { - styleParts.push(`border-color: ${combinedStyles.borderColor}`) - } - - if (styleParts.length > 0) { - styleParts.push(`border-style: solid`) - } - - return asStyleString(styleParts) -} - -export function makeDimensionStyles(wireframe: wireframe): string { - const styleParts: string[] = [] - - if (wireframe.width === '100vw') { - styleParts.push(`width: 100vw`) - } else if (isNumber(wireframe.width)) { - styleParts.push(`width: ${ensureUnit(wireframe.width)}`) - } - - if (isNumber(wireframe.height)) { - styleParts.push(`height: ${ensureUnit(wireframe.height)}`) - } - - return asStyleString(styleParts) -} - -export function makePositionStyles(wireframe: wireframe, styleOverride?: StyleOverride): string { - const styleParts: string[] = [] - - styleParts.push(makeDimensionStyles(wireframe)) - - if (styleOverride?.bottom) { - styleParts.push(`bottom: 0`) - styleParts.push(`position: fixed`) - } else { - const posX = wireframe.x || 0 - const posY = wireframe.y || 0 - if (isNumber(posX) || isNumber(posY)) { - styleParts.push(`position: fixed`) - if (isNumber(posX)) { - styleParts.push(`left: ${ensureUnit(posX)}`) - } - if (isNumber(posY)) { - styleParts.push(`top: ${ensureUnit(posY)}`) - } - } - } - - if (styleOverride?.['z-index']) { - styleParts.push(`z-index: ${styleOverride['z-index']}`) - } - - return asStyleString(styleParts) -} - -function makeLayoutStyles(wireframe: wireframe, styleOverride?: StyleOverride): string { - const styleParts: string[] = [] - - const combinedStyles = { - ...wireframe.style, - ...styleOverride, - } - - if (combinedStyles.verticalAlign) { - styleParts.push( - `align-items: ${{ top: 'flex-start', center: 'center', bottom: 'flex-end' }[combinedStyles.verticalAlign]}` - ) - } - if (combinedStyles.horizontalAlign) { - styleParts.push( - `justify-content: ${ - { left: 'flex-start', center: 'center', right: 'flex-end' }[combinedStyles.horizontalAlign] - }` - ) - } - - if (styleParts.length) { - styleParts.push(`display: flex`) - } - - if (isUnitLike(combinedStyles.paddingLeft)) { - styleParts.push(`padding-left: ${ensureUnit(combinedStyles.paddingLeft)}`) - } - if (isUnitLike(combinedStyles.paddingRight)) { - styleParts.push(`padding-right: ${ensureUnit(combinedStyles.paddingRight)}`) - } - if (isUnitLike(combinedStyles.paddingTop)) { - styleParts.push(`padding-top: ${ensureUnit(combinedStyles.paddingTop)}`) - } - if (isUnitLike(combinedStyles.paddingBottom)) { - styleParts.push(`padding-bottom: ${ensureUnit(combinedStyles.paddingBottom)}`) - } - - return asStyleString(styleParts) -} - -function makeFontStyles(wireframe: wireframe, styleOverride?: StyleOverride): string { - const styleParts: string[] = [] - - const combinedStyles = { - ...wireframe.style, - ...styleOverride, - } - - if (isUnitLike(combinedStyles.fontSize)) { - styleParts.push(`font-size: ${ensureUnit(combinedStyles?.fontSize)}`) - } - - if (combinedStyles.fontFamily) { - styleParts.push(`font-family: ${combinedStyles.fontFamily}`) - } - - return asStyleString(styleParts) -} - -export function makeIndeterminateProgressStyles(wireframe: wireframeProgress, styleOverride?: StyleOverride): string { - const combinedStyles = { - ...wireframe.style, - ...styleOverride, - } - - return asStyleString([ - makeBackgroundStyles(wireframe, styleOverride), - makePositionStyles(wireframe), - `border: 4px solid ${combinedStyles.borderColor || combinedStyles.color || 'transparent'};`, - `border-radius: 50%;border-top: 4px solid #fff;`, - `animation: spin 2s linear infinite;`, - ]) -} - -export function makeDeterminateProgressStyles(wireframe: wireframeProgress, styleOverride?: StyleOverride): string { - const combinedStyles = { - ...wireframe.style, - ...styleOverride, - } - - const radialGradient = `radial-gradient(closest-side, white 80%, transparent 0 99.9%, white 0)` - const conicGradient = `conic-gradient(${combinedStyles.color || 'black'} calc(${wireframe.value} * 1%), ${ - combinedStyles.backgroundColor - } 0)` - - return asStyleString([ - makeBackgroundStyles(wireframe, styleOverride), - makePositionStyles(wireframe), - 'border-radius: 50%', - - `background: ${radialGradient}, ${conicGradient}`, - ]) -} - -/** - * normally use makeStylesString instead, but sometimes you need styles without any colors applied - * */ -export function makeMinimalStyles(wireframe: wireframe, styleOverride?: StyleOverride): string { - return asStyleString([ - makePositionStyles(wireframe, styleOverride), - makeLayoutStyles(wireframe, styleOverride), - makeFontStyles(wireframe, styleOverride), - ]) -} - -export function makeBackgroundStyles(wireframe: wireframe, styleOverride?: StyleOverride): string { - let styleParts: string[] = [] - - const combinedStyles = { - ...wireframe.style, - ...styleOverride, - } - - if (combinedStyles.backgroundColor) { - styleParts.push(`background-color: ${combinedStyles.backgroundColor}`) - } - - if (combinedStyles.backgroundImage) { - const backgroundImageURL = combinedStyles.backgroundImage.startsWith('url(') - ? combinedStyles.backgroundImage - : `url('${dataURIOrPNG(combinedStyles.backgroundImage)}')` - styleParts = styleParts.concat([ - `background-image: ${backgroundImageURL}`, - `background-size: ${combinedStyles.backgroundSize || 'contain'}`, - `background-repeat: ${combinedStyles.backgroundRepeat || 'no-repeat'}`, - ]) - } - - return asStyleString(styleParts) -} - -export function makeColorStyles(wireframe: wireframe, styleOverride?: StyleOverride): string { - const combinedStyles = { - ...wireframe.style, - ...styleOverride, - } - - const styleParts = [makeBackgroundStyles(wireframe, styleOverride), makeBorderStyles(wireframe, styleOverride)] - if (combinedStyles.color) { - styleParts.push(`color: ${combinedStyles.color}`) - } - - return asStyleString(styleParts) -} - -export function makeStylesString(wireframe: wireframe, styleOverride?: StyleOverride): string { - return asStyleString([makeColorStyles(wireframe, styleOverride), makeMinimalStyles(wireframe, styleOverride)]) -} - -export function makeHTMLStyles(): string { - return 'height: 100vh; width: 100vw;' -} - -export function makeBodyStyles(): string { - return 'height: 100vh; width: 100vw;' -} diff --git a/ee/hogai/__init__.py b/ee/hogai/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/hogai/assistant.py b/ee/hogai/assistant.py deleted file mode 100644 index 5ea6be9f0b..0000000000 --- a/ee/hogai/assistant.py +++ /dev/null @@ -1,314 +0,0 @@ -import json -from collections.abc import Generator, Iterator -from typing import Any, Optional, cast -from uuid import uuid4 - -from langchain_core.callbacks.base import BaseCallbackHandler -from langchain_core.messages import AIMessageChunk -from langchain_core.runnables.config import RunnableConfig -from langgraph.graph.state import CompiledStateGraph -from posthoganalytics.ai.langchain.callbacks import CallbackHandler -from pydantic import BaseModel - -from ee.hogai.funnels.nodes import FunnelGeneratorNode -from ee.hogai.graph import AssistantGraph -from ee.hogai.memory.nodes import MemoryInitializerNode -from ee.hogai.retention.nodes import RetentionGeneratorNode -from ee.hogai.schema_generator.nodes import SchemaGeneratorNode -from ee.hogai.trends.nodes import TrendsGeneratorNode -from ee.hogai.utils.asgi import SyncIterableToAsync -from ee.hogai.utils.state import ( - GraphMessageUpdateTuple, - GraphTaskStartedUpdateTuple, - GraphValueUpdateTuple, - is_message_update, - is_state_update, - is_task_started_update, - is_value_update, - validate_state_update, - validate_value_update, -) -from ee.hogai.utils.types import AssistantNodeName, AssistantState, PartialAssistantState -from ee.models import Conversation -from posthog.event_usage import report_user_action -from posthog.models import Team, User -from posthog.ph_client import get_ph_client -from posthog.schema import ( - AssistantEventType, - AssistantGenerationStatusEvent, - AssistantGenerationStatusType, - AssistantMessage, - FailureMessage, - HumanMessage, - ReasoningMessage, - VisualizationMessage, -) -from posthog.settings import SERVER_GATEWAY_INTERFACE - -posthog_client = get_ph_client() - -VISUALIZATION_NODES: dict[AssistantNodeName, type[SchemaGeneratorNode]] = { - AssistantNodeName.TRENDS_GENERATOR: TrendsGeneratorNode, - AssistantNodeName.FUNNEL_GENERATOR: FunnelGeneratorNode, - AssistantNodeName.RETENTION_GENERATOR: RetentionGeneratorNode, -} - -STREAMING_NODES: set[AssistantNodeName] = { - AssistantNodeName.MEMORY_ONBOARDING, - AssistantNodeName.MEMORY_INITIALIZER, - AssistantNodeName.SUMMARIZER, -} -"""Nodes that can stream messages to the client.""" - - -VERBOSE_NODES = STREAMING_NODES | {AssistantNodeName.MEMORY_INITIALIZER_INTERRUPT} -"""Nodes that can send messages to the client.""" - - -class Assistant: - _team: Team - _graph: CompiledStateGraph - _user: Optional[User] - _conversation: Conversation - _latest_message: HumanMessage - _state: Optional[AssistantState] - _callback_handler: Optional[BaseCallbackHandler] - - def __init__( - self, - team: Team, - conversation: Conversation, - new_message: HumanMessage, - user: Optional[User] = None, - is_new_conversation: bool = False, - ): - self._team = team - self._user = user - self._conversation = conversation - self._latest_message = new_message.model_copy(deep=True, update={"id": str(uuid4())}) - self._is_new_conversation = is_new_conversation - self._graph = AssistantGraph(team).compile_full_graph() - self._chunks = AIMessageChunk(content="") - self._state = None - distinct_id = user.distinct_id if user else None - self._callback_handler = ( - CallbackHandler( - posthog_client, - distinct_id, - properties={ - "conversation_id": str(self._conversation.id), - "is_first_conversation": is_new_conversation, - }, - ) - if posthog_client - else None - ) - - def stream(self): - if SERVER_GATEWAY_INTERFACE == "ASGI": - return self._astream() - return self._stream() - - def _astream(self): - return SyncIterableToAsync(self._stream()) - - def _stream(self) -> Generator[str, None, None]: - state = self._init_or_update_state() - config = self._get_config() - - generator: Iterator[Any] = self._graph.stream( - state, config=config, stream_mode=["messages", "values", "updates", "debug"] - ) - - # Assign the conversation id to the client. - if self._is_new_conversation: - yield self._serialize_conversation() - - # Send the last message with the initialized id. - yield self._serialize_message(self._latest_message) - - try: - last_viz_message = None - for update in generator: - if message := self._process_update(update): - if isinstance(message, VisualizationMessage): - last_viz_message = message - yield self._serialize_message(message) - - # Check if the assistant has requested help. - state = self._graph.get_state(config) - if state.next: - interrupt_value = state.tasks[0].interrupts[0].value - yield self._serialize_message( - AssistantMessage(content=interrupt_value, id=str(uuid4())) - if isinstance(interrupt_value, str) - else interrupt_value - ) - else: - self._report_conversation_state(last_viz_message) - except: - # This is an unhandled error, so we just stop further generation at this point - yield self._serialize_message(FailureMessage()) - raise # Re-raise, so that the error is printed or goes into Sentry - - @property - def _initial_state(self) -> AssistantState: - return AssistantState(messages=[self._latest_message], start_id=self._latest_message.id) - - def _get_config(self) -> RunnableConfig: - callbacks = [self._callback_handler] if self._callback_handler else None - config: RunnableConfig = { - "recursion_limit": 24, - "callbacks": callbacks, - "configurable": {"thread_id": self._conversation.id}, - } - return config - - def _init_or_update_state(self): - config = self._get_config() - snapshot = self._graph.get_state(config) - if snapshot.next: - saved_state = validate_state_update(snapshot.values) - self._state = saved_state - self._graph.update_state(config, PartialAssistantState(messages=[self._latest_message], resumed=True)) - - return None - initial_state = self._initial_state - self._state = initial_state - return initial_state - - def _node_to_reasoning_message( - self, node_name: AssistantNodeName, input: AssistantState - ) -> Optional[ReasoningMessage]: - match node_name: - case AssistantNodeName.ROUTER: - return ReasoningMessage(content="Identifying type of analysis") - case ( - AssistantNodeName.TRENDS_PLANNER - | AssistantNodeName.TRENDS_PLANNER_TOOLS - | AssistantNodeName.FUNNEL_PLANNER - | AssistantNodeName.FUNNEL_PLANNER_TOOLS - | AssistantNodeName.RETENTION_PLANNER - | AssistantNodeName.RETENTION_PLANNER_TOOLS - ): - substeps: list[str] = [] - if input: - if intermediate_steps := input.intermediate_steps: - for action, _ in intermediate_steps: - match action.tool: - case "retrieve_event_properties": - substeps.append(f"Exploring `{action.tool_input}` event's properties") - case "retrieve_entity_properties": - substeps.append(f"Exploring {action.tool_input} properties") - case "retrieve_event_property_values": - assert isinstance(action.tool_input, dict) - substeps.append( - f"Analyzing `{action.tool_input['property_name']}` event's property `{action.tool_input['event_name']}`" - ) - case "retrieve_entity_property_values": - assert isinstance(action.tool_input, dict) - substeps.append( - f"Analyzing {action.tool_input['entity']} property `{action.tool_input['property_name']}`" - ) - return ReasoningMessage(content="Picking relevant events and properties", substeps=substeps) - case AssistantNodeName.TRENDS_GENERATOR: - return ReasoningMessage(content="Creating trends query") - case AssistantNodeName.FUNNEL_GENERATOR: - return ReasoningMessage(content="Creating funnel query") - case AssistantNodeName.RETENTION_GENERATOR: - return ReasoningMessage(content="Creating retention query") - case _: - return None - - def _process_update(self, update: Any) -> BaseModel | None: - if is_state_update(update): - _, new_state = update - self._state = validate_state_update(new_state) - elif is_value_update(update) and (new_message := self._process_value_update(update)): - return new_message - elif is_message_update(update) and (new_message := self._process_message_update(update)): - return new_message - elif is_task_started_update(update) and (new_message := self._process_task_started_update(update)): - return new_message - return None - - def _process_value_update(self, update: GraphValueUpdateTuple) -> BaseModel | None: - _, maybe_state_update = update - state_update = validate_value_update(maybe_state_update) - - if node_val := state_update.get(AssistantNodeName.ROUTER): - if isinstance(node_val, PartialAssistantState) and node_val.messages: - return node_val.messages[0] - elif intersected_nodes := state_update.keys() & VISUALIZATION_NODES.keys(): - # Reset chunks when schema validation fails. - self._chunks = AIMessageChunk(content="") - - node_name = intersected_nodes.pop() - node_val = state_update[node_name] - if not isinstance(node_val, PartialAssistantState): - return None - if node_val.messages: - return node_val.messages[0] - elif node_val.intermediate_steps: - return AssistantGenerationStatusEvent(type=AssistantGenerationStatusType.GENERATION_ERROR) - - for node_name in VERBOSE_NODES: - if node_val := state_update.get(node_name): - if isinstance(node_val, PartialAssistantState) and node_val.messages: - self._chunks = AIMessageChunk(content="") - return node_val.messages[0] - - return None - - def _process_message_update(self, update: GraphMessageUpdateTuple) -> BaseModel | None: - langchain_message, langgraph_state = update[1] - if isinstance(langchain_message, AIMessageChunk): - node_name = langgraph_state["langgraph_node"] - if node_name in VISUALIZATION_NODES.keys(): - self._chunks += langchain_message # type: ignore - parsed_message = VISUALIZATION_NODES[node_name].parse_output(self._chunks.tool_calls[0]["args"]) - if parsed_message: - initiator_id = self._state.start_id if self._state is not None else None - return VisualizationMessage(answer=parsed_message.query, initiator=initiator_id) - elif node_name in STREAMING_NODES: - self._chunks += langchain_message # type: ignore - if node_name == AssistantNodeName.MEMORY_INITIALIZER: - if not MemoryInitializerNode.should_process_message_chunk(langchain_message): - return None - else: - return AssistantMessage( - content=MemoryInitializerNode.format_message(cast(str, self._chunks.content)) - ) - return AssistantMessage(content=self._chunks.content) - return None - - def _process_task_started_update(self, update: GraphTaskStartedUpdateTuple) -> BaseModel | None: - _, task_update = update - node_name = task_update["payload"]["name"] # type: ignore - node_input = task_update["payload"]["input"] # type: ignore - if reasoning_message := self._node_to_reasoning_message(node_name, node_input): - return reasoning_message - return None - - def _serialize_message(self, message: BaseModel) -> str: - output = "" - if isinstance(message, AssistantGenerationStatusEvent): - output += f"event: {AssistantEventType.STATUS}\n" - else: - output += f"event: {AssistantEventType.MESSAGE}\n" - return output + f"data: {message.model_dump_json(exclude_none=True)}\n\n" - - def _serialize_conversation(self) -> str: - output = f"event: {AssistantEventType.CONVERSATION}\n" - json_conversation = json.dumps({"id": str(self._conversation.id)}) - output += f"data: {json_conversation}\n\n" - return output - - def _report_conversation_state(self, message: Optional[VisualizationMessage]): - human_message = self._latest_message - if self._user and message: - report_user_action( - self._user, - "chat with ai", - {"prompt": human_message.content, "response": message.model_dump_json(exclude_none=True)}, - ) diff --git a/ee/hogai/django_checkpoint/__init__.py b/ee/hogai/django_checkpoint/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/hogai/django_checkpoint/checkpointer.py b/ee/hogai/django_checkpoint/checkpointer.py deleted file mode 100644 index a57140fecd..0000000000 --- a/ee/hogai/django_checkpoint/checkpointer.py +++ /dev/null @@ -1,312 +0,0 @@ -import json -import random -import threading -from collections.abc import Iterable, Iterator, Sequence -from typing import Any, Optional, cast - -from django.db import transaction -from django.db.models import Q -from langchain_core.runnables import RunnableConfig -from langgraph.checkpoint.base import ( - WRITES_IDX_MAP, - BaseCheckpointSaver, - ChannelVersions, - Checkpoint, - CheckpointMetadata, - CheckpointTuple, - PendingWrite, - get_checkpoint_id, -) -from langgraph.checkpoint.serde.jsonplus import JsonPlusSerializer -from langgraph.checkpoint.serde.types import ChannelProtocol - -from ee.models.assistant import ConversationCheckpoint, ConversationCheckpointBlob, ConversationCheckpointWrite - - -class DjangoCheckpointer(BaseCheckpointSaver[str]): - jsonplus_serde = JsonPlusSerializer() - _lock: threading.Lock - - def __init__(self, *args): - super().__init__(*args) - self._lock = threading.Lock() - - def _load_writes(self, writes: Sequence[ConversationCheckpointWrite]) -> list[PendingWrite]: - return ( - [ - ( - str(checkpoint_write.task_id), - checkpoint_write.channel, - self.serde.loads_typed((checkpoint_write.type, checkpoint_write.blob)), - ) - for checkpoint_write in writes - if checkpoint_write.type is not None and checkpoint_write.blob is not None - ] - if writes - else [] - ) - - def _load_json(self, obj: Any): - return self.jsonplus_serde.loads(self.jsonplus_serde.dumps(obj)) - - def _dump_json(self, obj: Any) -> dict[str, Any]: - serialized_metadata = self.jsonplus_serde.dumps(obj) - # NOTE: we're using JSON serializer (not msgpack), so we need to remove null characters before writing - nulls_removed = serialized_metadata.decode().replace("\\u0000", "") - return json.loads(nulls_removed) - - def _get_checkpoint_qs( - self, - config: Optional[RunnableConfig], - filter: Optional[dict[str, Any]], - before: Optional[RunnableConfig], - ): - query = Q() - - # construct predicate for config filter - if config and "configurable" in config: - thread_id = config["configurable"].get("thread_id") - query &= Q(thread_id=thread_id) - checkpoint_ns = config["configurable"].get("checkpoint_ns") - if checkpoint_ns is not None: - query &= Q(checkpoint_ns=checkpoint_ns) - if checkpoint_id := get_checkpoint_id(config): - query &= Q(id=checkpoint_id) - - # construct predicate for metadata filter - if filter: - query &= Q(metadata__contains=filter) - - # construct predicate for `before` - if before is not None: - query &= Q(id__lt=get_checkpoint_id(before)) - - return ConversationCheckpoint.objects.filter(query).order_by("-id") - - def _get_checkpoint_channel_values( - self, checkpoint: ConversationCheckpoint - ) -> Iterable[ConversationCheckpointBlob]: - if not checkpoint.checkpoint: - return [] - loaded_checkpoint = self._load_json(checkpoint.checkpoint) - if "channel_versions" not in loaded_checkpoint: - return [] - query = Q() - for channel, version in loaded_checkpoint["channel_versions"].items(): - query |= Q(channel=channel, version=version) - return ConversationCheckpointBlob.objects.filter( - Q(thread_id=checkpoint.thread_id, checkpoint_ns=checkpoint.checkpoint_ns) & query - ) - - def list( - self, - config: Optional[RunnableConfig], - *, - filter: Optional[dict[str, Any]] = None, - before: Optional[RunnableConfig] = None, - limit: Optional[int] = None, - ) -> Iterator[CheckpointTuple]: - """List checkpoints from the database. - - This method retrieves a list of checkpoint tuples from the Postgres database based - on the provided config. The checkpoints are ordered by checkpoint ID in descending order (newest first). - - Args: - config (RunnableConfig): The config to use for listing the checkpoints. - filter (Optional[Dict[str, Any]]): Additional filtering criteria for metadata. Defaults to None. - before (Optional[RunnableConfig]): If provided, only checkpoints before the specified checkpoint ID are returned. Defaults to None. - limit (Optional[int]): The maximum number of checkpoints to return. Defaults to None. - - Yields: - Iterator[CheckpointTuple]: An iterator of checkpoint tuples. - """ - qs = self._get_checkpoint_qs(config, filter, before) - if limit: - qs = qs[:limit] - - for checkpoint in qs: - channel_values = self._get_checkpoint_channel_values(checkpoint) - loaded_checkpoint: Checkpoint = self._load_json(checkpoint.checkpoint) - - checkpoint_dict: Checkpoint = { - **loaded_checkpoint, - "pending_sends": [ - self.serde.loads_typed((checkpoint_write.type, checkpoint_write.blob)) - for checkpoint_write in checkpoint.pending_sends - ], - "channel_values": { - checkpoint_blob.channel: self.serde.loads_typed((checkpoint_blob.type, checkpoint_blob.blob)) - for checkpoint_blob in channel_values - if checkpoint_blob.type is not None - and checkpoint_blob.type != "empty" - and checkpoint_blob.blob is not None - }, - } - - yield CheckpointTuple( - { - "configurable": { - "thread_id": checkpoint.thread_id, - "checkpoint_ns": checkpoint.checkpoint_ns, - "checkpoint_id": checkpoint.id, - } - }, - checkpoint_dict, - self._load_json(checkpoint.metadata), - ( - { - "configurable": { - "thread_id": checkpoint.thread_id, - "checkpoint_ns": checkpoint.checkpoint_ns, - "checkpoint_id": checkpoint.parent_checkpoint_id, - } - } - if checkpoint.parent_checkpoint - else None - ), - self._load_writes(checkpoint.pending_writes), - ) - - def get_tuple(self, config: RunnableConfig) -> Optional[CheckpointTuple]: - """Get a checkpoint tuple from the database. - - This method retrieves a checkpoint tuple from the Postgres database based on the - provided config. If the config contains a "checkpoint_id" key, the checkpoint with - the matching thread ID and timestamp is retrieved. Otherwise, the latest checkpoint - for the given thread ID is retrieved. - - Args: - config (RunnableConfig): The config to use for retrieving the checkpoint. - - Returns: - Optional[CheckpointTuple]: The retrieved checkpoint tuple, or None if no matching checkpoint was found. - """ - return next(self.list(config), None) - - def put( - self, - config: RunnableConfig, - checkpoint: Checkpoint, - metadata: CheckpointMetadata, - new_versions: ChannelVersions, - ) -> RunnableConfig: - """Save a checkpoint to the database. - - This method saves a checkpoint to the Postgres database. The checkpoint is associated - with the provided config and its parent config (if any). - - Args: - config (RunnableConfig): The config to associate with the checkpoint. - checkpoint (Checkpoint): The checkpoint to save. - metadata (CheckpointMetadata): Additional metadata to save with the checkpoint. - new_versions (ChannelVersions): New channel versions as of this write. - - Returns: - RunnableConfig: Updated configuration after storing the checkpoint. - """ - configurable = config["configurable"] - thread_id: str = configurable["thread_id"] - checkpoint_id = get_checkpoint_id(config) - checkpoint_ns: str | None = configurable.get("checkpoint_ns") or "" - - checkpoint_copy = cast(dict[str, Any], checkpoint.copy()) - channel_values = checkpoint_copy.pop("channel_values", {}) - - next_config: RunnableConfig = { - "configurable": { - "thread_id": thread_id, - "checkpoint_ns": checkpoint_ns, - "checkpoint_id": checkpoint["id"], - } - } - - with self._lock, transaction.atomic(): - updated_checkpoint, _ = ConversationCheckpoint.objects.update_or_create( - id=checkpoint["id"], - thread_id=thread_id, - checkpoint_ns=checkpoint_ns, - defaults={ - "parent_checkpoint_id": checkpoint_id, - "checkpoint": self._dump_json({**checkpoint_copy, "pending_sends": []}), - "metadata": self._dump_json(metadata), - }, - ) - - blobs = [] - for channel, version in new_versions.items(): - type, blob = ( - self.serde.dumps_typed(channel_values[channel]) if channel in channel_values else ("empty", None) - ) - blobs.append( - ConversationCheckpointBlob( - checkpoint=updated_checkpoint, - thread_id=thread_id, - channel=channel, - version=str(version), - type=type, - blob=blob, - ) - ) - - ConversationCheckpointBlob.objects.bulk_create(blobs, ignore_conflicts=True) - return next_config - - def put_writes( - self, - config: RunnableConfig, - writes: Sequence[tuple[str, Any]], - task_id: str, - ) -> None: - """Store intermediate writes linked to a checkpoint. - - This method saves intermediate writes associated with a checkpoint to the Postgres database. - - Args: - config (RunnableConfig): Configuration of the related checkpoint. - writes (List[Tuple[str, Any]]): List of writes to store. - task_id (str): Identifier for the task creating the writes. - """ - configurable = config["configurable"] - thread_id: str = configurable["thread_id"] - checkpoint_id = get_checkpoint_id(config) - checkpoint_ns: str | None = configurable.get("checkpoint_ns") or "" - - with self._lock, transaction.atomic(): - # `put_writes` and `put` are concurrently called without guaranteeing the call order - # so we need to ensure the checkpoint is created before creating writes. - # Thread.lock() will prevent race conditions though to the same checkpoints within a single pod. - checkpoint, _ = ConversationCheckpoint.objects.get_or_create( - id=checkpoint_id, thread_id=thread_id, checkpoint_ns=checkpoint_ns - ) - - writes_to_create = [] - for idx, (channel, value) in enumerate(writes): - type, blob = self.serde.dumps_typed(value) - writes_to_create.append( - ConversationCheckpointWrite( - checkpoint=checkpoint, - task_id=task_id, - idx=idx, - channel=channel, - type=type, - blob=blob, - ) - ) - - ConversationCheckpointWrite.objects.bulk_create( - writes_to_create, - update_conflicts=all(w[0] in WRITES_IDX_MAP for w in writes), - unique_fields=["checkpoint", "task_id", "idx"], - update_fields=["channel", "type", "blob"], - ) - - def get_next_version(self, current: Optional[str | int], channel: ChannelProtocol) -> str: - if current is None: - current_v = 0 - elif isinstance(current, int): - current_v = current - else: - current_v = int(current.split(".")[0]) - next_v = current_v + 1 - next_h = random.random() - return f"{next_v:032}.{next_h:016}" diff --git a/ee/hogai/django_checkpoint/test/test_checkpointer.py b/ee/hogai/django_checkpoint/test/test_checkpointer.py deleted file mode 100644 index d7c7a91178..0000000000 --- a/ee/hogai/django_checkpoint/test/test_checkpointer.py +++ /dev/null @@ -1,425 +0,0 @@ -# type: ignore - -import operator -from typing import Annotated, Any, Optional, TypedDict - -from langchain_core.runnables import RunnableConfig -from langgraph.checkpoint.base import ( - Checkpoint, - CheckpointMetadata, - create_checkpoint, - empty_checkpoint, -) -from langgraph.checkpoint.base.id import uuid6 -from langgraph.errors import NodeInterrupt -from langgraph.graph import END, START -from langgraph.graph.state import CompiledStateGraph, StateGraph -from pydantic import BaseModel, Field - -from ee.hogai.django_checkpoint.checkpointer import DjangoCheckpointer -from ee.models.assistant import ( - Conversation, - ConversationCheckpoint, - ConversationCheckpointBlob, - ConversationCheckpointWrite, -) -from posthog.test.base import NonAtomicBaseTest - - -class TestDjangoCheckpointer(NonAtomicBaseTest): - CLASS_DATA_LEVEL_SETUP = False - - def _build_graph(self, checkpointer: DjangoCheckpointer): - class State(TypedDict): - val: int - - graph = StateGraph(State) - - def handle_node1(state: State) -> State: - if state["val"] == 1: - raise NodeInterrupt("test") - return {"val": state["val"] + 1} - - graph.add_node("node1", handle_node1) - graph.add_node("node2", lambda state: state) - - graph.add_edge(START, "node1") - graph.add_edge("node1", "node2") - graph.add_edge("node2", END) - - return graph.compile(checkpointer=checkpointer) - - def test_saver(self): - thread1 = Conversation.objects.create(user=self.user, team=self.team) - thread2 = Conversation.objects.create(user=self.user, team=self.team) - - config_1: RunnableConfig = { - "configurable": { - "thread_id": thread1.id, - "checkpoint_ns": "", - } - } - chkpnt_1: Checkpoint = empty_checkpoint() - - config_2: RunnableConfig = { - "configurable": { - "thread_id": thread2.id, - "checkpoint_ns": "", - } - } - chkpnt_2: Checkpoint = create_checkpoint(chkpnt_1, {}, 1) - - config_3: RunnableConfig = { - "configurable": { - "thread_id": thread2.id, - "checkpoint_id": chkpnt_2["id"], - "checkpoint_ns": "inner", - } - } - chkpnt_3: Checkpoint = empty_checkpoint() - - metadata_1: CheckpointMetadata = { - "source": "input", - "step": 2, - "writes": {}, - "score": 1, - } - metadata_2: CheckpointMetadata = { - "source": "loop", - "step": 1, - "writes": {"foo": "bar"}, - "score": None, - } - metadata_3: CheckpointMetadata = {} - - test_data = { - "configs": [config_1, config_2, config_3], - "checkpoints": [chkpnt_1, chkpnt_2, chkpnt_3], - "metadata": [metadata_1, metadata_2, metadata_3], - } - - saver = DjangoCheckpointer() - - configs = test_data["configs"] - checkpoints = test_data["checkpoints"] - metadata = test_data["metadata"] - - saver.put(configs[0], checkpoints[0], metadata[0], {}) - saver.put(configs[1], checkpoints[1], metadata[1], {}) - saver.put(configs[2], checkpoints[2], metadata[2], {}) - - # call method / assertions - query_1 = {"source": "input"} # search by 1 key - query_2 = { - "step": 1, - "writes": {"foo": "bar"}, - } # search by multiple keys - query_3: dict[str, Any] = {} # search by no keys, return all checkpoints - query_4 = {"source": "update", "step": 1} # no match - - search_results_1 = list(saver.list(None, filter=query_1)) - assert len(search_results_1) == 1 - assert search_results_1[0].metadata == metadata[0] - - search_results_2 = list(saver.list(None, filter=query_2)) - assert len(search_results_2) == 1 - assert search_results_2[0].metadata == metadata[1] - - search_results_3 = list(saver.list(None, filter=query_3)) - assert len(search_results_3) == 3 - - search_results_4 = list(saver.list(None, filter=query_4)) - assert len(search_results_4) == 0 - - # search by config (defaults to checkpoints across all namespaces) - search_results_5 = list(saver.list({"configurable": {"thread_id": thread2.id}})) - assert len(search_results_5) == 2 - assert { - search_results_5[0].config["configurable"]["checkpoint_ns"], - search_results_5[1].config["configurable"]["checkpoint_ns"], - } == {"", "inner"} - - def test_channel_versions(self): - thread1 = Conversation.objects.create(user=self.user, team=self.team) - - chkpnt = { - "v": 1, - "ts": "2024-07-31T20:14:19.804150+00:00", - "id": str(uuid6(clock_seq=-2)), - "channel_values": { - "post": "hog", - "node": "node", - }, - "channel_versions": { - "__start__": 2, - "my_key": 3, - "start:node": 3, - "node": 3, - }, - "versions_seen": { - "__input__": {}, - "__start__": {"__start__": 1}, - "node": {"start:node": 2}, - }, - "pending_sends": [], - } - metadata = {"meta": "key"} - - write_config = {"configurable": {"thread_id": thread1.id, "checkpoint_ns": ""}} - read_config = {"configurable": {"thread_id": thread1.id}} - - saver = DjangoCheckpointer() - saver.put(write_config, chkpnt, metadata, {}) - - checkpoint = ConversationCheckpoint.objects.first() - self.assertIsNotNone(checkpoint) - self.assertEqual(checkpoint.thread, thread1) - self.assertEqual(checkpoint.checkpoint_ns, "") - self.assertEqual(str(checkpoint.id), chkpnt["id"]) - self.assertIsNone(checkpoint.parent_checkpoint) - chkpnt.pop("channel_values") - self.assertEqual(checkpoint.checkpoint, chkpnt) - self.assertEqual(checkpoint.metadata, metadata) - - checkpoints = list(saver.list(read_config)) - self.assertEqual(len(checkpoints), 1) - - checkpoint = saver.get(read_config) - self.assertEqual(checkpoint, checkpoints[0].checkpoint) - - def test_put_copies_checkpoint(self): - thread1 = Conversation.objects.create(user=self.user, team=self.team) - chkpnt = { - "v": 1, - "ts": "2024-07-31T20:14:19.804150+00:00", - "id": str(uuid6(clock_seq=-2)), - "channel_values": { - "post": "hog", - "node": "node", - }, - "channel_versions": { - "__start__": 2, - "my_key": 3, - "start:node": 3, - "node": 3, - }, - "versions_seen": { - "__input__": {}, - "__start__": {"__start__": 1}, - "node": {"start:node": 2}, - }, - "pending_sends": [], - } - metadata = {"meta": "key"} - write_config = {"configurable": {"thread_id": thread1.id, "checkpoint_ns": ""}} - saver = DjangoCheckpointer() - saver.put(write_config, chkpnt, metadata, {}) - self.assertIn("channel_values", chkpnt) - - def test_concurrent_puts_and_put_writes(self): - graph: CompiledStateGraph = self._build_graph(DjangoCheckpointer()) - thread = Conversation.objects.create(user=self.user, team=self.team) - config = {"configurable": {"thread_id": str(thread.id)}} - graph.invoke( - {"val": 0}, - config=config, - ) - self.assertEqual(len(ConversationCheckpoint.objects.all()), 4) - self.assertEqual(len(ConversationCheckpointBlob.objects.all()), 10) - self.assertEqual(len(ConversationCheckpointWrite.objects.all()), 6) - - def test_resuming(self): - checkpointer = DjangoCheckpointer() - graph: CompiledStateGraph = self._build_graph(checkpointer) - thread = Conversation.objects.create(user=self.user, team=self.team) - config = {"configurable": {"thread_id": str(thread.id)}} - - graph.invoke( - {"val": 1}, - config=config, - ) - snapshot = graph.get_state(config) - self.assertIsNotNone(snapshot.next) - self.assertEqual(snapshot.tasks[0].interrupts[0].value, "test") - - self.assertEqual(len(ConversationCheckpoint.objects.all()), 2) - self.assertEqual(len(ConversationCheckpointBlob.objects.all()), 4) - self.assertEqual(len(ConversationCheckpointWrite.objects.all()), 3) - self.assertEqual(len(list(checkpointer.list(config))), 2) - - latest_checkpoint = ConversationCheckpoint.objects.last() - latest_write = ConversationCheckpointWrite.objects.filter(checkpoint=latest_checkpoint).first() - actual_checkpoint = checkpointer.get_tuple(config) - self.assertIsNotNone(actual_checkpoint) - self.assertIsNotNone(latest_write) - self.assertEqual(len(latest_checkpoint.writes.all()), 1) - blobs = list(latest_checkpoint.blobs.all()) - self.assertEqual(len(blobs), 3) - self.assertEqual(actual_checkpoint.checkpoint["id"], str(latest_checkpoint.id)) - self.assertEqual(len(actual_checkpoint.pending_writes), 1) - self.assertEqual(actual_checkpoint.pending_writes[0][0], str(latest_write.task_id)) - - graph.update_state(config, {"val": 2}) - # add the value update checkpoint - self.assertEqual(len(ConversationCheckpoint.objects.all()), 3) - self.assertEqual(len(ConversationCheckpointBlob.objects.all()), 6) - self.assertEqual(len(ConversationCheckpointWrite.objects.all()), 5) - self.assertEqual(len(list(checkpointer.list(config))), 3) - - res = graph.invoke(None, config=config) - self.assertEqual(len(ConversationCheckpoint.objects.all()), 5) - self.assertEqual(len(ConversationCheckpointBlob.objects.all()), 12) - self.assertEqual(len(ConversationCheckpointWrite.objects.all()), 9) - self.assertEqual(len(list(checkpointer.list(config))), 5) - self.assertEqual(res, {"val": 3}) - snapshot = graph.get_state(config) - self.assertFalse(snapshot.next) - - def test_checkpoint_blobs_are_bound_to_thread(self): - class State(TypedDict, total=False): - messages: Annotated[list[str], operator.add] - string: Optional[str] - - graph = StateGraph(State) - - def handle_node1(state: State): - return - - def handle_node2(state: State): - raise NodeInterrupt("test") - - graph.add_node("node1", handle_node1) - graph.add_node("node2", handle_node2) - - graph.add_edge(START, "node1") - graph.add_edge("node1", "node2") - graph.add_edge("node2", END) - - compiled = graph.compile(checkpointer=DjangoCheckpointer()) - - thread = Conversation.objects.create(user=self.user, team=self.team) - config = {"configurable": {"thread_id": str(thread.id)}} - compiled.invoke({"messages": ["hello"], "string": "world"}, config=config) - - snapshot = compiled.get_state(config) - self.assertIsNotNone(snapshot.next) - self.assertEqual(snapshot.tasks[0].interrupts[0].value, "test") - saved_state = snapshot.values - self.assertEqual(saved_state["messages"], ["hello"]) - self.assertEqual(saved_state["string"], "world") - - def test_checkpoint_can_save_and_load_pydantic_state(self): - class State(BaseModel): - messages: Annotated[list[str], operator.add] - string: Optional[str] - - class PartialState(BaseModel): - messages: Optional[list[str]] = Field(default=None) - string: Optional[str] = Field(default=None) - - graph = StateGraph(State) - - def handle_node1(state: State): - return PartialState() - - def handle_node2(state: State): - raise NodeInterrupt("test") - - graph.add_node("node1", handle_node1) - graph.add_node("node2", handle_node2) - - graph.add_edge(START, "node1") - graph.add_edge("node1", "node2") - graph.add_edge("node2", END) - - compiled = graph.compile(checkpointer=DjangoCheckpointer()) - - thread = Conversation.objects.create(user=self.user, team=self.team) - config = {"configurable": {"thread_id": str(thread.id)}} - compiled.invoke({"messages": ["hello"], "string": "world"}, config=config) - - snapshot = compiled.get_state(config) - self.assertIsNotNone(snapshot.next) - self.assertEqual(snapshot.tasks[0].interrupts[0].value, "test") - saved_state = snapshot.values - self.assertEqual(saved_state["messages"], ["hello"]) - self.assertEqual(saved_state["string"], "world") - - def test_saved_blobs(self): - class State(TypedDict, total=False): - messages: Annotated[list[str], operator.add] - - graph = StateGraph(State) - - def handle_node1(state: State): - return {"messages": ["world"]} - - graph.add_node("node1", handle_node1) - - graph.add_edge(START, "node1") - graph.add_edge("node1", END) - - checkpointer = DjangoCheckpointer() - compiled = graph.compile(checkpointer=checkpointer) - - thread = Conversation.objects.create(user=self.user, team=self.team) - config = {"configurable": {"thread_id": str(thread.id)}} - compiled.invoke({"messages": ["hello"]}, config=config) - - snapshot = compiled.get_state(config) - self.assertFalse(snapshot.next) - saved_state = snapshot.values - self.assertEqual(saved_state["messages"], ["hello", "world"]) - - blobs = list(ConversationCheckpointBlob.objects.filter(thread=thread)) - self.assertEqual(len(blobs), 7) - - # Set initial state - self.assertEqual(blobs[0].channel, "__start__") - self.assertEqual(blobs[0].type, "msgpack") - self.assertEqual( - checkpointer.serde.loads_typed((blobs[0].type, blobs[0].blob)), - {"messages": ["hello"]}, - ) - - # Set first node - self.assertEqual(blobs[1].channel, "__start__") - self.assertEqual(blobs[1].type, "empty") - self.assertIsNone(blobs[1].blob) - - # Set value channels before start - self.assertEqual(blobs[2].channel, "messages") - self.assertEqual(blobs[2].type, "msgpack") - self.assertEqual( - checkpointer.serde.loads_typed((blobs[2].type, blobs[2].blob)), - ["hello"], - ) - - # Transition to node1 - self.assertEqual(blobs[3].channel, "start:node1") - self.assertEqual(blobs[3].type, "msgpack") - self.assertEqual( - checkpointer.serde.loads_typed((blobs[3].type, blobs[3].blob)), - "__start__", - ) - - # Set new state for messages - self.assertEqual(blobs[4].channel, "messages") - self.assertEqual(blobs[4].type, "msgpack") - self.assertEqual( - checkpointer.serde.loads_typed((blobs[4].type, blobs[4].blob)), - ["hello", "world"], - ) - - # After setting a state - self.assertEqual(blobs[5].channel, "start:node1") - self.assertEqual(blobs[5].type, "empty") - self.assertIsNone(blobs[5].blob) - - # Set last step - self.assertEqual(blobs[6].channel, "node1") - self.assertEqual(blobs[6].type, "msgpack") - self.assertEqual( - checkpointer.serde.loads_typed((blobs[6].type, blobs[6].blob)), - "node1", - ) diff --git a/ee/hogai/eval/__init__.py b/ee/hogai/eval/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/hogai/eval/conftest.py b/ee/hogai/eval/conftest.py deleted file mode 100644 index 56606dab4a..0000000000 --- a/ee/hogai/eval/conftest.py +++ /dev/null @@ -1,134 +0,0 @@ -import functools -from collections.abc import Generator -from pathlib import Path - -import pytest -from django.conf import settings -from django.test import override_settings -from langchain_core.runnables import RunnableConfig - -from ee.models import Conversation -from ee.models.assistant import CoreMemory -from posthog.demo.matrix.manager import MatrixManager -from posthog.models import Organization, Project, Team, User -from posthog.tasks.demo_create_data import HedgeboxMatrix -from posthog.test.base import BaseTest - - -# Flaky is a handy tool, but it always runs setup fixtures for retries. -# This decorator will just retry without re-running setup. -def retry_test_only(max_retries=3): - def decorator(func): - @functools.wraps(func) - def wrapper(*args, **kwargs): - last_error: Exception | None = None - for attempt in range(max_retries): - try: - return func(*args, **kwargs) - except Exception as e: - last_error = e - print(f"\nRetrying test (attempt {attempt + 1}/{max_retries})...") # noqa - if last_error: - raise last_error - - return wrapper - - return decorator - - -# Apply decorators to all tests in the package. -def pytest_collection_modifyitems(items): - current_dir = Path(__file__).parent - for item in items: - if Path(item.fspath).is_relative_to(current_dir): - item.add_marker( - pytest.mark.skipif(not settings.IN_EVAL_TESTING, reason="Only runs for the assistant evaluation") - ) - # Apply our custom retry decorator to the test function - item.obj = retry_test_only(max_retries=3)(item.obj) - - -@pytest.fixture(scope="package") -def team(django_db_blocker) -> Generator[Team, None, None]: - with django_db_blocker.unblock(): - organization = Organization.objects.create(name=BaseTest.CONFIG_ORGANIZATION_NAME) - project = Project.objects.create(id=Team.objects.increment_id_sequence(), organization=organization) - team = Team.objects.create( - id=project.id, - project=project, - organization=organization, - test_account_filters=[ - { - "key": "email", - "value": "@posthog.com", - "operator": "not_icontains", - "type": "person", - } - ], - has_completed_onboarding_for={"product_analytics": True}, - ) - yield team - organization.delete() - - -@pytest.fixture(scope="package") -def user(team, django_db_blocker) -> Generator[User, None, None]: - with django_db_blocker.unblock(): - user = User.objects.create_and_join(team.organization, "eval@posthog.com", "password1234") - yield user - user.delete() - - -@pytest.fixture(scope="package") -def core_memory(team) -> Generator[CoreMemory, None, None]: - initial_memory = """Hedgebox is a cloud storage service enabling users to store, share, and access files across devices. - - The company operates in the cloud storage and collaboration market for individuals and businesses. - - Their audience includes professionals and organizations seeking file management and collaboration solutions. - - Hedgebox’s freemium model provides free accounts with limited storage and paid subscription plans for additional features. - - Core features include file storage, synchronization, sharing, and collaboration tools for seamless file access and sharing. - - It integrates with third-party applications to enhance functionality and streamline workflows. - - Hedgebox sponsors the YouTube channel Marius Tech Tips.""" - - core_memory = CoreMemory.objects.create( - team=team, - text=initial_memory, - initial_text=initial_memory, - scraping_status=CoreMemory.ScrapingStatus.COMPLETED, - ) - yield core_memory - core_memory.delete() - - -@pytest.mark.django_db(transaction=True) -@pytest.fixture -def runnable_config(team, user) -> Generator[RunnableConfig, None, None]: - conversation = Conversation.objects.create(team=team, user=user) - yield { - "configurable": { - "thread_id": conversation.id, - } - } - conversation.delete() - - -@pytest.fixture(scope="package", autouse=True) -def setup_test_data(django_db_setup, team, user, django_db_blocker): - with django_db_blocker.unblock(): - matrix = HedgeboxMatrix( - seed="b1ef3c66-5f43-488a-98be-6b46d92fbcef", # this seed generates all events - days_past=120, - days_future=30, - n_clusters=500, - group_type_index_offset=0, - ) - matrix_manager = MatrixManager(matrix, print_steps=True) - with override_settings(TEST=False): - # Simulation saving should occur in non-test mode, so that Kafka isn't mocked. Normally in tests we don't - # want to ingest via Kafka, but simulation saving is specifically designed to use that route for speed - matrix_manager.run_on_team(team, user) diff --git a/ee/hogai/eval/tests/test_eval_funnel_generator.py b/ee/hogai/eval/tests/test_eval_funnel_generator.py deleted file mode 100644 index 5f0f292432..0000000000 --- a/ee/hogai/eval/tests/test_eval_funnel_generator.py +++ /dev/null @@ -1,46 +0,0 @@ -from collections.abc import Callable -from typing import cast - -import pytest -from langgraph.graph.state import CompiledStateGraph - -from ee.hogai.assistant import AssistantGraph -from ee.hogai.utils.types import AssistantNodeName, AssistantState -from posthog.schema import AssistantFunnelsQuery, HumanMessage, VisualizationMessage - - -@pytest.fixture -def call_node(team, runnable_config) -> Callable[[str, str], AssistantFunnelsQuery]: - graph: CompiledStateGraph = ( - AssistantGraph(team) - .add_edge(AssistantNodeName.START, AssistantNodeName.FUNNEL_GENERATOR) - .add_funnel_generator(AssistantNodeName.END) - .compile() - ) - - def callable(query: str, plan: str) -> AssistantFunnelsQuery: - state = graph.invoke( - AssistantState(messages=[HumanMessage(content=query)], plan=plan), - runnable_config, - ) - return cast(VisualizationMessage, AssistantState.model_validate(state).messages[-1]).answer - - return callable - - -def test_node_replaces_equals_with_contains(call_node): - query = "what is the conversion rate from a page view to sign up for users with name John?" - plan = """Sequence: - 1. $pageview - - property filter 1 - - person - - name - - equals - - John - 2. signed_up - """ - actual_output = call_node(query, plan).model_dump_json(exclude_none=True) - assert "exact" not in actual_output - assert "icontains" in actual_output - assert "John" not in actual_output - assert "john" in actual_output diff --git a/ee/hogai/eval/tests/test_eval_funnel_planner.py b/ee/hogai/eval/tests/test_eval_funnel_planner.py deleted file mode 100644 index c8bc25bc0b..0000000000 --- a/ee/hogai/eval/tests/test_eval_funnel_planner.py +++ /dev/null @@ -1,224 +0,0 @@ -from collections.abc import Callable - -import pytest -from deepeval import assert_test -from deepeval.metrics import GEval -from deepeval.test_case import LLMTestCase, LLMTestCaseParams -from langchain_core.runnables.config import RunnableConfig -from langgraph.graph.state import CompiledStateGraph - -from ee.hogai.assistant import AssistantGraph -from ee.hogai.utils.types import AssistantNodeName, AssistantState -from posthog.schema import HumanMessage - - -@pytest.fixture(scope="module") -def metric(): - return GEval( - name="Funnel Plan Correctness", - criteria="You will be given expected and actual generated plans to provide a taxonomy to answer a user's question with a funnel insight. Compare the plans to determine whether the taxonomy of the actual plan matches the expected plan. Do not apply general knowledge about funnel insights.", - evaluation_steps=[ - "A plan must define at least two series in the sequence, but it is not required to define any filters, exclusion steps, or a breakdown.", - "Compare events, properties, math types, and property values of 'expected output' and 'actual output'. Do not penalize if the actual output does not include a timeframe.", - "Check if the combination of events, properties, and property values in 'actual output' can answer the user's question according to the 'expected output'.", - # The criteria for aggregations must be more specific because there isn't a way to bypass them. - "Check if the math types in 'actual output' match those in 'expected output.' If the aggregation type is specified by a property, user, or group in 'expected output', the same property, user, or group must be used in 'actual output'.", - "If 'expected output' contains exclusion steps, check if 'actual output' contains those, and heavily penalize if the exclusion steps are not present or different.", - "If 'expected output' contains a breakdown, check if 'actual output' contains a similar breakdown, and heavily penalize if the breakdown is not present or different. Plans may only have one breakdown.", - # We don't want to see in the output unnecessary property filters. The assistant tries to use them all the time. - "Heavily penalize if the 'actual output' contains any excessive output not present in the 'expected output'. For example, the `is set` operator in filters should not be used unless the user explicitly asks for it.", - ], - evaluation_params=[ - LLMTestCaseParams.INPUT, - LLMTestCaseParams.EXPECTED_OUTPUT, - LLMTestCaseParams.ACTUAL_OUTPUT, - ], - threshold=0.7, - ) - - -@pytest.fixture -def call_node(team, runnable_config: RunnableConfig) -> Callable[[str], str]: - graph: CompiledStateGraph = ( - AssistantGraph(team) - .add_edge(AssistantNodeName.START, AssistantNodeName.FUNNEL_PLANNER) - .add_funnel_planner(AssistantNodeName.END) - .compile() - ) - - def callable(query: str) -> str: - state = graph.invoke( - AssistantState(messages=[HumanMessage(content=query)]), - runnable_config, - ) - return AssistantState.model_validate(state).plan or "" - - return callable - - -def test_basic_funnel(metric, call_node): - query = "what was the conversion from a page view to sign up?" - test_case = LLMTestCase( - input=query, - expected_output=""" - Sequence: - 1. $pageview - 2. signed_up - """, - actual_output=call_node(query), - ) - assert_test(test_case, [metric]) - - -def test_outputs_at_least_two_events(metric, call_node): - """ - Ambigious query. The funnel must return at least two events. - """ - query = "how many users paid a bill?" - test_case = LLMTestCase( - input=query, - expected_output=""" - Sequence: - 1. any event - 2. upgrade_plan - """, - actual_output=call_node(query), - ) - assert_test(test_case, [metric]) - - -def test_no_excessive_property_filters(metric, call_node): - query = "Show the user conversion from a sign up to a file download" - test_case = LLMTestCase( - input=query, - expected_output=""" - Sequence: - 1. signed_up - 2. downloaded_file - """, - actual_output=call_node(query), - ) - assert_test(test_case, [metric]) - - -def test_basic_filtering(metric, call_node): - query = "What was the conversion from uploading a file to downloading it from Chrome and Safari in the last 30d?" - test_case = LLMTestCase( - input=query, - expected_output=""" - Sequence: - 1. uploaded_file - - property filter 1: - - entity: event - - property name: $browser - - property type: String - - operator: equals - - property value: Chrome - - property filter 2: - - entity: event - - property name: $browser - - property type: String - - operator: equals - - property value: Safari - 2. downloaded_file - - property filter 1: - - entity: event - - property name: $browser - - property type: String - - operator: equals - - property value: Chrome - - property filter 2: - - entity: event - - property name: $browser - - property type: String - - operator: equals - - property value: Safari - """, - actual_output=call_node(query), - ) - assert_test(test_case, [metric]) - - -def test_exclusion_steps(metric, call_node): - query = "What was the conversion from uploading a file to downloading it in the last 30d excluding users that deleted a file?" - test_case = LLMTestCase( - input=query, - expected_output=""" - Sequence: - 1. uploaded_file - 2. downloaded_file - - Exclusions: - - deleted_file - - start index: 0 - - end index: 1 - """, - actual_output=call_node(query), - ) - assert_test(test_case, [metric]) - - -def test_breakdown(metric, call_node): - query = "Show a conversion from uploading a file to downloading it segmented by a browser" - test_case = LLMTestCase( - input=query, - expected_output=""" - Sequence: - 1. uploaded_file - 2. downloaded_file - - Breakdown by: - - entity: event - - property name: $browser - """, - actual_output=call_node(query), - ) - assert_test(test_case, [metric]) - - -def test_needle_in_a_haystack(metric, call_node): - query = "What was the conversion from a sign up to a paying customer on the personal-pro plan?" - test_case = LLMTestCase( - input=query, - expected_output=""" - Sequence: - 1. signed_up - 2. paid_bill - - property filter 1: - - entity: event - - property name: plan - - property type: String - - operator: equals - - property value: personal/pro - """, - actual_output=call_node(query), - ) - assert_test(test_case, [metric]) - - -def test_planner_outputs_multiple_series_from_a_single_series_question(metric, call_node): - query = "What's our sign-up funnel?" - test_case = LLMTestCase( - input=query, - expected_output=""" - Sequence: - 1. $pageview - 2. signed_up - """, - actual_output=call_node(query), - ) - assert_test(test_case, [metric]) - - -def test_funnel_does_not_include_timeframe(metric, call_node): - query = "what was the conversion from a page view to sign up for event time before 2024-01-01?" - test_case = LLMTestCase( - input=query, - expected_output=""" - Sequence: - 1. $pageview - 2. signed_up - """, - actual_output=call_node(query), - ) - assert_test(test_case, [metric]) diff --git a/ee/hogai/eval/tests/test_eval_memory.py b/ee/hogai/eval/tests/test_eval_memory.py deleted file mode 100644 index 54329f6d5e..0000000000 --- a/ee/hogai/eval/tests/test_eval_memory.py +++ /dev/null @@ -1,178 +0,0 @@ -import json -from collections.abc import Callable -from typing import Optional - -import pytest -from deepeval import assert_test -from deepeval.metrics import GEval, ToolCorrectnessMetric -from deepeval.test_case import LLMTestCase, LLMTestCaseParams -from langchain_core.messages import AIMessage -from langchain_core.runnables.config import RunnableConfig -from langgraph.graph.state import CompiledStateGraph - -from ee.hogai.assistant import AssistantGraph -from ee.hogai.utils.types import AssistantNodeName, AssistantState -from posthog.schema import HumanMessage - - -@pytest.fixture -def retrieval_metrics(): - retrieval_correctness_metric = GEval( - name="Correctness", - criteria="Determine whether the actual output is factually correct based on the expected output.", - evaluation_steps=[ - "Check whether the facts in 'actual output' contradicts any facts in 'expected output'", - "You should also heavily penalize omission of detail", - "Vague language, or contradicting OPINIONS, are OK", - "The actual fact must only contain information about the user's company or product", - "Context must not contain similar information to the actual fact", - ], - evaluation_params=[ - LLMTestCaseParams.INPUT, - LLMTestCaseParams.CONTEXT, - LLMTestCaseParams.EXPECTED_OUTPUT, - LLMTestCaseParams.ACTUAL_OUTPUT, - ], - threshold=0.7, - ) - - return [ToolCorrectnessMetric(), retrieval_correctness_metric] - - -@pytest.fixture -def replace_metrics(): - retrieval_correctness_metric = GEval( - name="Correctness", - criteria="Determine whether the actual output tuple is factually correct based on the expected output tuple. The first element is the original fact from the context to replace with, while the second element is the new fact to replace it with.", - evaluation_steps=[ - "Check whether the facts in 'actual output' contradicts any facts in 'expected output'", - "You should also heavily penalize omission of detail", - "Vague language, or contradicting OPINIONS, are OK", - "The actual fact must only contain information about the user's company or product", - "Context must contain the first element of the tuples", - "For deletion, the second element should be an empty string in both the actual and expected output", - ], - evaluation_params=[ - LLMTestCaseParams.INPUT, - LLMTestCaseParams.CONTEXT, - LLMTestCaseParams.EXPECTED_OUTPUT, - LLMTestCaseParams.ACTUAL_OUTPUT, - ], - threshold=0.7, - ) - - return [ToolCorrectnessMetric(), retrieval_correctness_metric] - - -@pytest.fixture -def call_node(team, runnable_config: RunnableConfig) -> Callable[[str], Optional[AIMessage]]: - graph: CompiledStateGraph = ( - AssistantGraph(team).add_memory_collector(AssistantNodeName.END, AssistantNodeName.END).compile() - ) - - def callable(query: str) -> Optional[AIMessage]: - state = graph.invoke( - AssistantState(messages=[HumanMessage(content=query)]), - runnable_config, - ) - validated_state = AssistantState.model_validate(state) - if not validated_state.memory_collection_messages: - return None - return validated_state.memory_collection_messages[-1] - - return callable - - -def test_saves_relevant_fact(call_node, retrieval_metrics, core_memory): - query = "calculate ARR: use the paid_bill event and the amount property." - actual_output = call_node(query) - tool = actual_output.tool_calls[0] - - test_case = LLMTestCase( - input=query, - expected_output="The product uses the event paid_bill and the property amount to calculate Annual Recurring Revenue (ARR).", - expected_tools=["core_memory_append"], - context=[core_memory.formatted_text], - actual_output=tool["args"]["memory_content"], - tools_called=[tool["name"]], - ) - assert_test(test_case, retrieval_metrics) - - -def test_saves_company_related_information(call_node, retrieval_metrics, core_memory): - query = "Our secondary target audience is technical founders or highly-technical product managers." - actual_output = call_node(query) - tool = actual_output.tool_calls[0] - - test_case = LLMTestCase( - input=query, - expected_output="The company's secondary target audience is technical founders or highly-technical product managers.", - expected_tools=["core_memory_append"], - context=[core_memory.formatted_text], - actual_output=tool["args"]["memory_content"], - tools_called=[tool["name"]], - ) - assert_test(test_case, retrieval_metrics) - - -def test_omits_irrelevant_personal_information(call_node): - query = "My name is John Doherty." - actual_output = call_node(query) - assert actual_output is None - - -def test_omits_irrelevant_excessive_info_from_insights(call_node): - query = "Build a pageview trend for users with name John." - actual_output = call_node(query) - assert actual_output is None - - -def test_fact_replacement(call_node, core_memory, replace_metrics): - query = "Hedgebox doesn't sponsor the YouTube channel Marius Tech Tips anymore." - actual_output = call_node(query) - tool = actual_output.tool_calls[0] - - test_case = LLMTestCase( - input=query, - expected_output=json.dumps( - [ - "Hedgebox sponsors the YouTube channel Marius Tech Tips.", - "Hedgebox no longer sponsors the YouTube channel Marius Tech Tips.", - ] - ), - expected_tools=["core_memory_replace"], - context=[core_memory.formatted_text], - actual_output=json.dumps([tool["args"]["original_fragment"], tool["args"]["new_fragment"]]), - tools_called=[tool["name"]], - ) - assert_test(test_case, replace_metrics) - - -def test_fact_removal(call_node, core_memory, replace_metrics): - query = "Delete info that Hedgebox sponsored the YouTube channel Marius Tech Tips." - actual_output = call_node(query) - tool = actual_output.tool_calls[0] - - test_case = LLMTestCase( - input=query, - expected_output=json.dumps(["Hedgebox sponsors the YouTube channel Marius Tech Tips.", ""]), - expected_tools=["core_memory_replace"], - context=[core_memory.formatted_text], - actual_output=json.dumps([tool["args"]["original_fragment"], tool["args"]["new_fragment"]]), - tools_called=[tool["name"]], - ) - assert_test(test_case, replace_metrics) - - -def test_parallel_calls(call_node): - query = "Delete info that Hedgebox sponsored the YouTube channel Marius Tech Tips, and we don't have file sharing." - actual_output = call_node(query) - - tool = actual_output.tool_calls - test_case = LLMTestCase( - input=query, - expected_tools=["core_memory_replace", "core_memory_append"], - actual_output=actual_output.content, - tools_called=[tool[0]["name"], tool[1]["name"]], - ) - assert_test(test_case, [ToolCorrectnessMetric()]) diff --git a/ee/hogai/eval/tests/test_eval_retention_generator.py b/ee/hogai/eval/tests/test_eval_retention_generator.py deleted file mode 100644 index 409a2d5883..0000000000 --- a/ee/hogai/eval/tests/test_eval_retention_generator.py +++ /dev/null @@ -1,76 +0,0 @@ -from collections.abc import Callable -from typing import cast - -import pytest -from langgraph.graph.state import CompiledStateGraph - -from ee.hogai.assistant import AssistantGraph -from ee.hogai.utils.types import AssistantNodeName, AssistantState -from posthog.schema import ( - AssistantRetentionQuery, - HumanMessage, - RetentionEntity, - VisualizationMessage, -) - - -@pytest.fixture -def call_node(team, runnable_config) -> Callable[[str, str], AssistantRetentionQuery]: - graph: CompiledStateGraph = ( - AssistantGraph(team) - .add_edge(AssistantNodeName.START, AssistantNodeName.RETENTION_GENERATOR) - .add_retention_generator(AssistantNodeName.END) - .compile() - ) - - def callable(query: str, plan: str) -> AssistantRetentionQuery: - state = graph.invoke( - AssistantState(messages=[HumanMessage(content=query)], plan=plan), - runnable_config, - ) - message = cast(VisualizationMessage, AssistantState.model_validate(state).messages[-1]) - answer = message.answer - assert isinstance(answer, AssistantRetentionQuery), "Expected AssistantRetentionQuery" - return answer - - return callable - - -def test_node_replaces_equals_with_contains(call_node): - query = "Show file upload retention after signup for users with name John" - plan = """Target event: - - signed_up - - Returning event: - - file_uploaded - - Filters: - - property filter 1: - - person - - name - - equals - - John - """ - actual_output = call_node(query, plan).model_dump_json(exclude_none=True) - assert "exact" not in actual_output - assert "icontains" in actual_output - assert "John" not in actual_output - assert "john" in actual_output - - -def test_basic_retention_structure(call_node): - query = "Show retention for users who signed up" - plan = """Target Event: - - signed_up - - Returning Event: - - file_uploaded - """ - actual_output = call_node(query, plan) - assert actual_output.retentionFilter is not None - assert actual_output.retentionFilter.targetEntity == RetentionEntity( - id="signed_up", type="events", name="signed_up", order=0 - ) - assert actual_output.retentionFilter.returningEntity == RetentionEntity( - id="file_uploaded", type="events", name="file_uploaded", order=0 - ) diff --git a/ee/hogai/eval/tests/test_eval_retention_planner.py b/ee/hogai/eval/tests/test_eval_retention_planner.py deleted file mode 100644 index b050fbea41..0000000000 --- a/ee/hogai/eval/tests/test_eval_retention_planner.py +++ /dev/null @@ -1,118 +0,0 @@ -from collections.abc import Callable - -import pytest -from deepeval import assert_test -from deepeval.metrics import GEval -from deepeval.test_case import LLMTestCase, LLMTestCaseParams -from langchain_core.runnables.config import RunnableConfig -from langgraph.graph.state import CompiledStateGraph - -from ee.hogai.assistant import AssistantGraph -from ee.hogai.utils.types import AssistantNodeName, AssistantState -from posthog.schema import HumanMessage - - -@pytest.fixture(scope="module") -def metric(): - return GEval( - name="Retention Plan Correctness", - criteria="You will be given expected and actual generated plans to provide a taxonomy to answer a user's question with a retention insight. Compare the plans to determine whether the taxonomy of the actual plan matches the expected plan. Do not apply general knowledge about retention insights.", - evaluation_steps=[ - "A plan must define both a target event (cohort-defining event) and a returning event (retention-measuring event), but it is not required to define any filters. It can't have breakdowns.", - "Compare target event, returning event, properties, and property values of 'expected output' and 'actual output'. Do not penalize if the actual output does not include a timeframe.", - "Check if the combination of target events, returning events, properties, and property values in 'actual output' can answer the user's question according to the 'expected output'.", - "If 'expected output' contains a breakdown, check if 'actual output' contains a similar breakdown, and heavily penalize if the breakdown is not present or different.", - # We don't want to see in the output unnecessary property filters. The assistant tries to use them all the time. - "Heavily penalize if the 'actual output' contains any excessive output not present in the 'expected output'. For example, the `is set` operator in filters should not be used unless the user explicitly asks for it.", - ], - evaluation_params=[ - LLMTestCaseParams.INPUT, - LLMTestCaseParams.EXPECTED_OUTPUT, - LLMTestCaseParams.ACTUAL_OUTPUT, - ], - threshold=0.7, - ) - - -@pytest.fixture -def call_node(team, runnable_config: RunnableConfig) -> Callable[[str], str]: - graph: CompiledStateGraph = ( - AssistantGraph(team) - .add_edge(AssistantNodeName.START, AssistantNodeName.RETENTION_PLANNER) - .add_retention_planner(AssistantNodeName.END) - .compile() - ) - - def callable(query: str) -> str: - raw_state = graph.invoke( - AssistantState(messages=[HumanMessage(content=query)]), - runnable_config, - ) - state = AssistantState.model_validate(raw_state) - return state.plan or "NO PLAN WAS GENERATED" - - return callable - - -def test_basic_retention(metric, call_node): - query = "What's the file upload retention of new users?" - test_case = LLMTestCase( - input=query, - expected_output=""" - Target event: - - signed_up - - Returning event: - - uploaded_file - """, - actual_output=call_node(query), - ) - assert_test(test_case, [metric]) - - -def test_basic_filtering(metric, call_node): - query = "Show retention of Chrome users uploading files" - test_case = LLMTestCase( - input=query, - expected_output=""" - Target event: - - uploaded_file - - Returning event: - - uploaded_file - - Filters: - - property filter 1: - - entity: event - - property name: $browser - - property type: String - - operator: equals - - property value: Chrome - """, - actual_output=call_node(query), - ) - assert_test(test_case, [metric]) - - -def test_needle_in_a_haystack(metric, call_node): - query = "Show retention for users who have paid a bill and are on the personal/pro plan" - test_case = LLMTestCase( - input=query, - expected_output=""" - Target event: - - paid_bill - - Returning event: - - downloaded_file - - Filters: - - property filter 1: - - entity: account - - property name: plan - - property type: String - - operator: equals - - property value: personal/pro - """, - actual_output=call_node(query), - ) - assert_test(test_case, [metric]) diff --git a/ee/hogai/eval/tests/test_eval_router.py b/ee/hogai/eval/tests/test_eval_router.py deleted file mode 100644 index 7c4a3325ea..0000000000 --- a/ee/hogai/eval/tests/test_eval_router.py +++ /dev/null @@ -1,80 +0,0 @@ -from collections.abc import Callable -from typing import cast - -import pytest -from langgraph.graph.state import CompiledStateGraph - -from ee.hogai.assistant import AssistantGraph -from ee.hogai.utils.types import AssistantNodeName, AssistantState -from posthog.schema import HumanMessage, RouterMessage - - -@pytest.fixture -def call_node(team, runnable_config) -> Callable[[str | list], str]: - graph: CompiledStateGraph = ( - AssistantGraph(team) - .add_edge(AssistantNodeName.START, AssistantNodeName.ROUTER) - .add_router(path_map={"trends": AssistantNodeName.END, "funnel": AssistantNodeName.END}) - .compile() - ) - - def callable(query: str | list) -> str: - messages = [HumanMessage(content=query)] if isinstance(query, str) else query - state = graph.invoke( - AssistantState(messages=messages), - runnable_config, - ) - return cast(RouterMessage, AssistantState.model_validate(state).messages[-1]).content - - return callable - - -def test_outputs_basic_trends_insight(call_node): - query = "Show the $pageview trend" - res = call_node(query) - assert res == "trends" - - -def test_outputs_basic_funnel_insight(call_node): - query = "What is the conversion rate of users who uploaded a file to users who paid for a plan?" - res = call_node(query) - assert res == "funnel" - - -def test_converts_trends_to_funnel(call_node): - conversation = [ - HumanMessage(content="Show trends of $pageview and $identify"), - RouterMessage(content="trends"), - HumanMessage(content="Convert this insight to a funnel"), - ] - res = call_node(conversation[:1]) - assert res == "trends" - res = call_node(conversation) - assert res == "funnel" - - -def test_converts_funnel_to_trends(call_node): - conversation = [ - HumanMessage(content="What is the conversion from a page view to a sign up?"), - RouterMessage(content="funnel"), - HumanMessage(content="Convert this insight to a trends"), - ] - res = call_node(conversation[:1]) - assert res == "funnel" - res = call_node(conversation) - assert res == "trends" - - -def test_outputs_single_trends_insight(call_node): - """ - Must display a trends insight because it's not possible to build a funnel with a single series. - """ - query = "how many users upgraded their plan to personal pro?" - res = call_node(query) - assert res == "trends" - - -def test_classifies_funnel_with_single_series(call_node): - query = "What's our sign-up funnel?" - res = call_node(query) - assert res == "funnel" diff --git a/ee/hogai/eval/tests/test_eval_trends_generator.py b/ee/hogai/eval/tests/test_eval_trends_generator.py deleted file mode 100644 index c8491957c8..0000000000 --- a/ee/hogai/eval/tests/test_eval_trends_generator.py +++ /dev/null @@ -1,65 +0,0 @@ -from collections.abc import Callable -from typing import cast - -import pytest -from langgraph.graph.state import CompiledStateGraph - -from ee.hogai.assistant import AssistantGraph -from ee.hogai.utils.types import AssistantNodeName, AssistantState -from posthog.schema import AssistantTrendsQuery, HumanMessage, VisualizationMessage - - -@pytest.fixture -def call_node(team, runnable_config) -> Callable[[str, str], AssistantTrendsQuery]: - graph: CompiledStateGraph = ( - AssistantGraph(team) - .add_edge(AssistantNodeName.START, AssistantNodeName.TRENDS_GENERATOR) - .add_trends_generator(AssistantNodeName.END) - .compile() - ) - - def callable(query: str, plan: str) -> AssistantTrendsQuery: - state = graph.invoke( - AssistantState(messages=[HumanMessage(content=query)], plan=plan), - runnable_config, - ) - return cast(VisualizationMessage, AssistantState.model_validate(state).messages[-1]).answer - - return callable - - -def test_node_replaces_equals_with_contains(call_node): - query = "what is pageview trend for users with name John?" - plan = """Events: - - $pageview - - math operation: total count - - property filter 1 - - person - - name - - equals - - John - """ - actual_output = call_node(query, plan).model_dump_json(exclude_none=True) - assert "exact" not in actual_output - assert "icontains" in actual_output - assert "John" not in actual_output - assert "john" in actual_output - - -def test_node_leans_towards_line_graph(call_node): - query = "How often do users download files?" - # We ideally want to consider both total count of downloads per period, as well as how often a median user downloads - plan = """Events: - - downloaded_file - - math operation: total count - - downloaded_file - - math operation: median count per user - """ - actual_output = call_node(query, plan) - assert actual_output.trendsFilter.display == "ActionsLineGraph" - assert actual_output.series[0].kind == "EventsNode" - assert actual_output.series[0].event == "downloaded_file" - assert actual_output.series[0].math == "total" - assert actual_output.series[1].kind == "EventsNode" - assert actual_output.series[1].event == "downloaded_file" - assert actual_output.series[1].math == "median_count_per_actor" diff --git a/ee/hogai/eval/tests/test_eval_trends_planner.py b/ee/hogai/eval/tests/test_eval_trends_planner.py deleted file mode 100644 index 4d4ea4c41d..0000000000 --- a/ee/hogai/eval/tests/test_eval_trends_planner.py +++ /dev/null @@ -1,196 +0,0 @@ -from collections.abc import Callable - -import pytest -from deepeval import assert_test -from deepeval.metrics import GEval -from deepeval.test_case import LLMTestCase, LLMTestCaseParams -from langchain_core.runnables.config import RunnableConfig -from langgraph.graph.state import CompiledStateGraph - -from ee.hogai.assistant import AssistantGraph -from ee.hogai.utils.types import AssistantNodeName, AssistantState -from posthog.schema import HumanMessage - - -@pytest.fixture(scope="module") -def metric(): - return GEval( - name="Trends Plan Correctness", - criteria="You will be given expected and actual generated plans to provide a taxonomy to answer a user's question with a trends insight. Compare the plans to determine whether the taxonomy of the actual plan matches the expected plan. Do not apply general knowledge about trends insights.", - evaluation_steps=[ - "A plan must define at least one event and a math type, but it is not required to define any filters, breakdowns, or formulas.", - "Compare events, properties, math types, and property values of 'expected output' and 'actual output'. Do not penalize if the actual output does not include a timeframe.", - "Check if the combination of events, properties, and property values in 'actual output' can answer the user's question according to the 'expected output'.", - # The criteria for aggregations must be more specific because there isn't a way to bypass them. - "Check if the math types in 'actual output' match those in 'expected output'. Math types sometimes are interchangeable, so use your judgement. If the aggregation type is specified by a property, user, or group in 'expected output', the same property, user, or group must be used in 'actual output'.", - "If 'expected output' contains a breakdown, check if 'actual output' contains a similar breakdown, and heavily penalize if the breakdown is not present or different.", - "If 'expected output' contains a formula, check if 'actual output' contains a similar formula, and heavily penalize if the formula is not present or different.", - # We don't want to see in the output unnecessary property filters. The assistant tries to use them all the time. - "Heavily penalize if the 'actual output' contains any excessive output not present in the 'expected output'. For example, the `is set` operator in filters should not be used unless the user explicitly asks for it.", - ], - evaluation_params=[ - LLMTestCaseParams.INPUT, - LLMTestCaseParams.EXPECTED_OUTPUT, - LLMTestCaseParams.ACTUAL_OUTPUT, - ], - threshold=0.7, - ) - - -@pytest.fixture -def call_node(team, runnable_config: RunnableConfig) -> Callable[[str], str]: - graph: CompiledStateGraph = ( - AssistantGraph(team) - .add_edge(AssistantNodeName.START, AssistantNodeName.TRENDS_PLANNER) - .add_trends_planner(AssistantNodeName.END) - .compile() - ) - - def callable(query: str) -> str: - state = graph.invoke( - AssistantState(messages=[HumanMessage(content=query)]), - runnable_config, - ) - return AssistantState.model_validate(state).plan or "" - - return callable - - -def test_no_excessive_property_filters(metric, call_node): - query = "Show the $pageview trend" - test_case = LLMTestCase( - input=query, - expected_output=""" - Events: - - $pageview - - math operation: total count - """, - actual_output=call_node(query), - ) - assert_test(test_case, [metric]) - - -def test_no_excessive_property_filters_for_a_defined_math_type(metric, call_node): - query = "What is the MAU?" - test_case = LLMTestCase( - input=query, - expected_output=""" - Events: - - $pageview - - math operation: unique users - """, - actual_output=call_node(query), - ) - assert_test(test_case, [metric]) - - -def test_basic_filtering(metric, call_node): - query = "can you compare how many Chrome vs Safari users uploaded a file in the last 30d?" - test_case = LLMTestCase( - input=query, - expected_output=""" - Events: - - uploaded_file - - math operation: total count - - property filter 1: - - entity: event - - property name: $browser - - property type: String - - operator: equals - - property value: Chrome - - property filter 2: - - entity: event - - property name: $browser - - property type: String - - operator: equals - - property value: Safari - - Breakdown by: - - breakdown 1: - - entity: event - - property name: $browser - """, - actual_output=call_node(query), - ) - assert_test(test_case, [metric]) - - -def test_formula_mode(metric, call_node): - query = "i want to see a ratio of identify divided by page views" - test_case = LLMTestCase( - input=query, - expected_output=""" - Events: - - $identify - - math operation: total count - - $pageview - - math operation: total count - - Formula: - `A/B`, where `A` is the total count of `$identify` and `B` is the total count of `$pageview` - """, - actual_output=call_node(query), - ) - assert_test(test_case, [metric]) - - -def test_math_type_by_a_property(metric, call_node): - query = "what is the average session duration?" - test_case = LLMTestCase( - input=query, - expected_output=""" - Events: - - All Events - - math operation: average by `$session_duration` - """, - actual_output=call_node(query), - ) - assert_test(test_case, [metric]) - - -def test_math_type_by_a_user(metric, call_node): - query = "What is the median page view count for a user?" - test_case = LLMTestCase( - input=query, - expected_output=""" - Events: - - $pageview - - math operation: median by users - """, - actual_output=call_node(query), - ) - assert_test(test_case, [metric]) - - -def test_needle_in_a_haystack(metric, call_node): - query = "How frequently do people pay for a personal-pro plan?" - test_case = LLMTestCase( - input=query, - expected_output=""" - Events: - - paid_bill - - math operation: total count - - property filter 1: - - entity: event - - property name: plan - - property type: String - - operator: contains - - property value: personal/pro - """, - actual_output=call_node(query), - ) - assert_test(test_case, [metric]) - - -def test_trends_does_not_include_timeframe(metric, call_node): - query = "what is the pageview trend for event time before 2024-01-01?" - test_case = LLMTestCase( - input=query, - expected_output=""" - Events: - - $pageview - - math operation: total count - """, - actual_output=call_node(query), - ) - assert_test(test_case, [metric]) diff --git a/ee/hogai/funnels/__init__.py b/ee/hogai/funnels/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/hogai/funnels/nodes.py b/ee/hogai/funnels/nodes.py deleted file mode 100644 index 6f71305e0b..0000000000 --- a/ee/hogai/funnels/nodes.py +++ /dev/null @@ -1,50 +0,0 @@ -from langchain_core.prompts import ChatPromptTemplate -from langchain_core.runnables import RunnableConfig - -from ee.hogai.funnels.prompts import FUNNEL_SYSTEM_PROMPT, REACT_SYSTEM_PROMPT -from ee.hogai.funnels.toolkit import FUNNEL_SCHEMA, FunnelsTaxonomyAgentToolkit -from ee.hogai.schema_generator.nodes import SchemaGeneratorNode, SchemaGeneratorToolsNode -from ee.hogai.schema_generator.utils import SchemaGeneratorOutput -from ee.hogai.taxonomy_agent.nodes import TaxonomyAgentPlannerNode, TaxonomyAgentPlannerToolsNode -from ee.hogai.utils.types import AssistantState, PartialAssistantState -from posthog.schema import AssistantFunnelsQuery - - -class FunnelPlannerNode(TaxonomyAgentPlannerNode): - def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistantState: - toolkit = FunnelsTaxonomyAgentToolkit(self._team) - prompt = ChatPromptTemplate.from_messages( - [ - ("system", REACT_SYSTEM_PROMPT), - ], - template_format="mustache", - ) - return super()._run_with_prompt_and_toolkit(state, prompt, toolkit, config=config) - - -class FunnelPlannerToolsNode(TaxonomyAgentPlannerToolsNode): - def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistantState: - toolkit = FunnelsTaxonomyAgentToolkit(self._team) - return super()._run_with_toolkit(state, toolkit, config=config) - - -FunnelsSchemaGeneratorOutput = SchemaGeneratorOutput[AssistantFunnelsQuery] - - -class FunnelGeneratorNode(SchemaGeneratorNode[AssistantFunnelsQuery]): - INSIGHT_NAME = "Funnels" - OUTPUT_MODEL = FunnelsSchemaGeneratorOutput - OUTPUT_SCHEMA = FUNNEL_SCHEMA - - def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistantState: - prompt = ChatPromptTemplate.from_messages( - [ - ("system", FUNNEL_SYSTEM_PROMPT), - ], - template_format="mustache", - ) - return super()._run_with_prompt(state, prompt, config=config) - - -class FunnelGeneratorToolsNode(SchemaGeneratorToolsNode): - pass diff --git a/ee/hogai/funnels/prompts.py b/ee/hogai/funnels/prompts.py deleted file mode 100644 index e70d5105d2..0000000000 --- a/ee/hogai/funnels/prompts.py +++ /dev/null @@ -1,155 +0,0 @@ -REACT_SYSTEM_PROMPT = """ -<agent_info> -You are an expert product analyst agent specializing in data visualization and funnel analysis. Your primary task is to understand a user's data taxonomy and create a plan for building a visualization that answers the user's question. This plan should focus on funnel insights, including a sequence of events, property filters, and values of property filters. - -{{core_memory_instructions}} - -{{react_format}} -</agent_info> - -<core_memory> -{{core_memory}} -</core_memory> - -{{react_human_in_the_loop}} - -Below you will find information on how to correctly discover the taxonomy of the user's data. - -<general_knowledge> -Funnel insights enable users to understand how users move through their product. It is usually a sequence of events that users go through: some of them continue to the next step, some of them drop off. Funnels are perfect for finding conversion rates. -</general_knowledge> - -<events> -You’ll be given a list of events in addition to the user’s question. Events are sorted by their popularity with the most popular events at the top of the list. Prioritize popular events. You must always specify events to use. Events always have an associated user’s profile. Assess whether the sequence of events suffices to answer the question before applying property filters or a breakdown. You must define at least two series. Funnel insights do not require breakdowns or filters by default. -</events> - -{{react_property_filters}} - -<exclusion_steps> -Users may want to use exclusion events to filter out conversions in which a particular event occurred between specific steps. These events must not be included in the main sequence. You must include start and end indexes for each exclusion where the minimum index is zero and the maximum index is the number of steps minus one in the funnel. - -For example, there is a sequence with three steps: sign up, finish onboarding, purchase. If the user wants to exclude all conversions in which users have not navigated away before finishing the onboarding, the exclusion step will be: - -``` -Exclusions: -- $pageleave - - start index: 0 - - end index: 1 -``` -</exclusion_steps> - -<breakdown> -A breakdown is used to segment data by a single property value. They divide all defined funnel series into multiple subseries based on the values of the property. Include a breakdown **only when it is essential to directly answer the user’s question**. You must not add a breakdown if the question can be addressed without additional segmentation. - -When using breakdowns, you must: -- **Identify the property group** and name for a breakdown. -- **Provide the property name** for a breakdown. -- **Validate that the property value accurately reflects the intended criteria**. - -Examples of using a breakdown: -- page views to sign up funnel by country: you need to find a property such as `$geoip_country_code` and set it as a breakdown. -- conversion rate of users who have completed onboarding after signing up by an organization: you need to find a property such as `organization name` and set it as a breakdown. -</breakdown> - -<reminders> -- Ensure that any properties and a breakdown included are directly relevant to the context and objectives of the user’s question. Avoid unnecessary or unrelated details. -- Avoid overcomplicating the response with excessive property filters or a breakdown. Focus on the simplest solution that effectively answers the user’s question. -</reminders> ---- - -{{react_format_reminder}} -""" - -FUNNEL_SYSTEM_PROMPT = """ -Act as an expert product manager. Your task is to generate a JSON schema of funnel insights. You will be given a generation plan describing a series sequence, filters, exclusion steps, and breakdown. Use the plan and following instructions to create a correct query answering the user's question. - -Below is the additional context. - -Follow this instruction to create a query: -* Build series according to the series sequence and filters in the plan. Properties can be of multiple types: String, Numeric, Bool, and DateTime. A property can be an array of those types and only has a single type. -* Apply the exclusion steps and breakdown according to the plan. -* When evaluating filter operators, replace the `equals` or `doesn't equal` operators with `contains` or `doesn't contain` if the query value is likely a personal name, company name, or any other name-sensitive term where letter casing matters. For instance, if the value is β€˜John Doe’ or β€˜Acme Corp’, replace `equals` with `contains` and change the value to lowercase from `John Doe` to `john doe` or `Acme Corp` to `acme corp`. -* Determine the funnel order type, aggregation type, and visualization type that will answer the user's question in the best way. Use the provided defaults. -* Determine the window interval and unit. Use the provided defaults. -* Choose the date range and the interval the user wants to analyze. -* Determine if the user wants to name the series or use the default names. -* Determine if the user wants to filter out internal and test users. If the user didn't specify, filter out internal and test users by default. -* Determine if you need to apply a sampling factor, different layout, bin count, etc. Only specify those if the user has explicitly asked. -* Use your judgment if there are any other parameters that the user might want to adjust that aren't listed here. - -The user might want to receive insights about groups. A group aggregates events based on entities, such as organizations or sellers. The user might provide a list of group names and their numeric indexes. Instead of a group's name, always use its numeric index. - -The funnel can be aggregated by: -- Unique users (default, do not specify anything to use it). Use this option unless the user states otherwise. -- Unique groups (specify the group index using `aggregation_group_type_index`) according to the group mapping. -- Unique sessions (specify the constant for `funnelAggregateByHogQL`). - -## Schema Examples - -### Question: How does a conversion from a first recorded event to an insight saved change for orgs? - -Plan: -``` -Sequence: -1. first team event ingested -2. insight saved -``` - -Output: -``` -{"aggregation_group_type_index":0,"dateRange":{"date_from":"-6m"},"filterTestAccounts":true,"funnelsFilter":{"breakdownAttributionType":"first_touch","funnelOrderType":"ordered","funnelVizType":"trends","funnelWindowInterval":14,"funnelWindowIntervalUnit":"day"},"interval":"month","kind":"FunnelsQuery","series":[{"event":"first team event ingested","kind":"EventsNode"},{"event":"insight saved","kind":"EventsNode"}]} -``` - -### Question: What percentage of users have clicked the CTA on the signup page within one hour on different platforms in the last six months without leaving the page? - -Plan: -``` -Sequence: -1. $pageview - - $current_url - - operator: contains - - value: signup -2. click subscribe button - - $current_url - - operator: contains - - value: signup - -Exclusions: -- $pageleave - - start index: 1 - - end index: 2 - -Breakdown: -- event -- $os -``` - -Output: -``` -{"kind":"FunnelsQuery","series":[{"kind":"EventsNode","event":"$pageview","properties":[{"key":"$current_url","type":"event","value":"signup","operator":"icontains"}]},{"kind":"EventsNode","event":"click subscribe button","properties":[{"key":"$current_url","type":"event","value":"signup","operator":"icontains"}]}],"interval":"week","dateRange":{"date_from":"-180d"},"funnelsFilter":{"funnelWindowInterval":1,"funnelWindowIntervalUnit":"hour","funnelOrderType":"ordered","exclusions":[{"kind":"EventsNode","event":"$pageleave","funnelFromStep":0,"funnelToStep":1}]},"filterTestAccounts":true,"breakdownFilter":{"breakdown_type":"event","breakdown":"$os"}} -``` - -### Question: rate of credit card purchases from viewing the product without any events in between - -Plan: -``` -Sequence: -1. view product -2. purchase - - paymentMethod - - operator: exact - - value: credit_card -``` - -Output: -``` -{"dateRange":{"date_from":"-30d"},"filterTestAccounts":true,"funnelsFilter":{"funnelOrderType":"strict","funnelWindowInterval":14,"funnelWindowIntervalUnit":"day"},"interval":"month","kind":"FunnelsQuery","series":[{"event":"view product","kind":"EventsNode"},{"event":"purchase","kind":"EventsNode","properties":[{"key":"paymentMethod","type":"event","value":"credit_card","operator":"exact"}]}]} -``` - -Obey these rules: -- If the date range is not specified, use the best judgment to select a reasonable date range. By default, use the last 30 days. -- Filter internal users by default if the user doesn't specify. -- You can't create new events or property definitions. Stick to the plan. - -Remember, your efforts will be rewarded by the company's founders. Do not hallucinate. -""" diff --git a/ee/hogai/funnels/test/__init__.py b/ee/hogai/funnels/test/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/hogai/funnels/test/test_nodes.py b/ee/hogai/funnels/test/test_nodes.py deleted file mode 100644 index 91b53d13cb..0000000000 --- a/ee/hogai/funnels/test/test_nodes.py +++ /dev/null @@ -1,39 +0,0 @@ -from unittest.mock import patch - -from django.test import override_settings -from langchain_core.runnables import RunnableLambda - -from ee.hogai.funnels.nodes import FunnelGeneratorNode, FunnelsSchemaGeneratorOutput -from ee.hogai.utils.types import AssistantState, PartialAssistantState -from posthog.schema import ( - AssistantFunnelsQuery, - HumanMessage, - VisualizationMessage, -) -from posthog.test.base import APIBaseTest, ClickhouseTestMixin - - -@override_settings(IN_UNIT_TESTING=True) -class TestFunnelsGeneratorNode(ClickhouseTestMixin, APIBaseTest): - def setUp(self): - super().setUp() - self.schema = AssistantFunnelsQuery(series=[]) - - def test_node_runs(self): - node = FunnelGeneratorNode(self.team) - with patch.object(FunnelGeneratorNode, "_model") as generator_model_mock: - generator_model_mock.return_value = RunnableLambda( - lambda _: FunnelsSchemaGeneratorOutput(query=self.schema).model_dump() - ) - new_state = node.run( - AssistantState(messages=[HumanMessage(content="Text")], plan="Plan"), - {}, - ) - self.assertEqual( - new_state, - PartialAssistantState( - messages=[VisualizationMessage(answer=self.schema, plan="Plan", id=new_state.messages[0].id)], - intermediate_steps=[], - plan="", - ), - ) diff --git a/ee/hogai/funnels/toolkit.py b/ee/hogai/funnels/toolkit.py deleted file mode 100644 index ae603519cc..0000000000 --- a/ee/hogai/funnels/toolkit.py +++ /dev/null @@ -1,73 +0,0 @@ -from ee.hogai.taxonomy_agent.toolkit import TaxonomyAgentToolkit, ToolkitTool -from ee.hogai.utils.helpers import dereference_schema -from posthog.schema import AssistantFunnelsQuery - - -class FunnelsTaxonomyAgentToolkit(TaxonomyAgentToolkit): - def _get_tools(self) -> list[ToolkitTool]: - return [ - *self._default_tools, - { - "name": "final_answer", - "signature": "(final_response: str)", - "description": """ - Use this tool to provide the final answer to the user's question. - - Answer in the following format: - ``` - Sequence: - 1. event 1 - - property filter 1: - - entity - - property name - - property type - - operator - - property value - - property filter 2... Repeat for each property filter. - 2. event 2 - - property filter 1: - - entity - - property name - - property type - - operator - - property value - - property filter 2... Repeat for each property filter. - 3. Repeat for each event... - - (if exclusion steps are used) - Exclusions: - - exclusion 1 - - start index: 1 - - end index: 2 - - exclusion 2... Repeat for each exclusion... - - (if a breakdown is used) - Breakdown by: - - entity - - property name - ``` - - Args: - final_response: List all events and properties that you want to use to answer the question. - """, - }, - ] - - -def generate_funnel_schema() -> dict: - schema = AssistantFunnelsQuery.model_json_schema() - return { - "name": "output_insight_schema", - "description": "Outputs the JSON schema of a product analytics insight", - "parameters": { - "type": "object", - "properties": { - "query": dereference_schema(schema), - }, - "additionalProperties": False, - "required": ["query"], - }, - } - - -FUNNEL_SCHEMA = generate_funnel_schema() diff --git a/ee/hogai/graph.py b/ee/hogai/graph.py deleted file mode 100644 index abab3c4b1b..0000000000 --- a/ee/hogai/graph.py +++ /dev/null @@ -1,302 +0,0 @@ -from collections.abc import Hashable -from typing import Optional, cast - -from langchain_core.runnables.base import RunnableLike -from langgraph.graph.state import StateGraph - -from ee.hogai.django_checkpoint.checkpointer import DjangoCheckpointer -from ee.hogai.funnels.nodes import ( - FunnelGeneratorNode, - FunnelGeneratorToolsNode, - FunnelPlannerNode, - FunnelPlannerToolsNode, -) -from ee.hogai.memory.nodes import ( - MemoryCollectorNode, - MemoryCollectorToolsNode, - MemoryInitializerInterruptNode, - MemoryInitializerNode, - MemoryOnboardingNode, -) -from ee.hogai.retention.nodes import ( - RetentionGeneratorNode, - RetentionGeneratorToolsNode, - RetentionPlannerNode, - RetentionPlannerToolsNode, -) -from ee.hogai.router.nodes import RouterNode -from ee.hogai.summarizer.nodes import SummarizerNode -from ee.hogai.trends.nodes import ( - TrendsGeneratorNode, - TrendsGeneratorToolsNode, - TrendsPlannerNode, - TrendsPlannerToolsNode, -) -from ee.hogai.utils.types import AssistantNodeName, AssistantState -from posthog.models.team.team import Team - -checkpointer = DjangoCheckpointer() - - -class AssistantGraph: - _team: Team - _graph: StateGraph - - def __init__(self, team: Team): - self._team = team - self._graph = StateGraph(AssistantState) - self._has_start_node = False - - def add_edge(self, from_node: AssistantNodeName, to_node: AssistantNodeName): - if from_node == AssistantNodeName.START: - self._has_start_node = True - self._graph.add_edge(from_node, to_node) - return self - - def add_node(self, node: AssistantNodeName, action: RunnableLike): - self._graph.add_node(node, action) - return self - - def compile(self): - if not self._has_start_node: - raise ValueError("Start node not added to the graph") - return self._graph.compile(checkpointer=checkpointer) - - def add_router( - self, - path_map: Optional[dict[Hashable, AssistantNodeName]] = None, - ): - builder = self._graph - path_map = path_map or { - "trends": AssistantNodeName.TRENDS_PLANNER, - "funnel": AssistantNodeName.FUNNEL_PLANNER, - "retention": AssistantNodeName.RETENTION_PLANNER, - } - router_node = RouterNode(self._team) - builder.add_node(AssistantNodeName.ROUTER, router_node.run) - builder.add_conditional_edges( - AssistantNodeName.ROUTER, - router_node.router, - path_map=cast(dict[Hashable, str], path_map), - ) - return self - - def add_trends_planner(self, next_node: AssistantNodeName = AssistantNodeName.TRENDS_GENERATOR): - builder = self._graph - - create_trends_plan_node = TrendsPlannerNode(self._team) - builder.add_node(AssistantNodeName.TRENDS_PLANNER, create_trends_plan_node.run) - builder.add_conditional_edges( - AssistantNodeName.TRENDS_PLANNER, - create_trends_plan_node.router, - path_map={ - "tools": AssistantNodeName.TRENDS_PLANNER_TOOLS, - }, - ) - - create_trends_plan_tools_node = TrendsPlannerToolsNode(self._team) - builder.add_node(AssistantNodeName.TRENDS_PLANNER_TOOLS, create_trends_plan_tools_node.run) - builder.add_conditional_edges( - AssistantNodeName.TRENDS_PLANNER_TOOLS, - create_trends_plan_tools_node.router, - path_map={ - "continue": AssistantNodeName.TRENDS_PLANNER, - "plan_found": next_node, - }, - ) - - return self - - def add_trends_generator(self, next_node: AssistantNodeName = AssistantNodeName.SUMMARIZER): - builder = self._graph - - trends_generator = TrendsGeneratorNode(self._team) - builder.add_node(AssistantNodeName.TRENDS_GENERATOR, trends_generator.run) - - trends_generator_tools = TrendsGeneratorToolsNode(self._team) - builder.add_node(AssistantNodeName.TRENDS_GENERATOR_TOOLS, trends_generator_tools.run) - - builder.add_edge(AssistantNodeName.TRENDS_GENERATOR_TOOLS, AssistantNodeName.TRENDS_GENERATOR) - builder.add_conditional_edges( - AssistantNodeName.TRENDS_GENERATOR, - trends_generator.router, - path_map={ - "tools": AssistantNodeName.TRENDS_GENERATOR_TOOLS, - "next": next_node, - }, - ) - - return self - - def add_funnel_planner(self, next_node: AssistantNodeName = AssistantNodeName.FUNNEL_GENERATOR): - builder = self._graph - - funnel_planner = FunnelPlannerNode(self._team) - builder.add_node(AssistantNodeName.FUNNEL_PLANNER, funnel_planner.run) - builder.add_conditional_edges( - AssistantNodeName.FUNNEL_PLANNER, - funnel_planner.router, - path_map={ - "tools": AssistantNodeName.FUNNEL_PLANNER_TOOLS, - }, - ) - - funnel_planner_tools = FunnelPlannerToolsNode(self._team) - builder.add_node(AssistantNodeName.FUNNEL_PLANNER_TOOLS, funnel_planner_tools.run) - builder.add_conditional_edges( - AssistantNodeName.FUNNEL_PLANNER_TOOLS, - funnel_planner_tools.router, - path_map={ - "continue": AssistantNodeName.FUNNEL_PLANNER, - "plan_found": next_node, - }, - ) - - return self - - def add_funnel_generator(self, next_node: AssistantNodeName = AssistantNodeName.SUMMARIZER): - builder = self._graph - - funnel_generator = FunnelGeneratorNode(self._team) - builder.add_node(AssistantNodeName.FUNNEL_GENERATOR, funnel_generator.run) - - funnel_generator_tools = FunnelGeneratorToolsNode(self._team) - builder.add_node(AssistantNodeName.FUNNEL_GENERATOR_TOOLS, funnel_generator_tools.run) - - builder.add_edge(AssistantNodeName.FUNNEL_GENERATOR_TOOLS, AssistantNodeName.FUNNEL_GENERATOR) - builder.add_conditional_edges( - AssistantNodeName.FUNNEL_GENERATOR, - funnel_generator.router, - path_map={ - "tools": AssistantNodeName.FUNNEL_GENERATOR_TOOLS, - "next": next_node, - }, - ) - - return self - - def add_retention_planner(self, next_node: AssistantNodeName = AssistantNodeName.RETENTION_GENERATOR): - builder = self._graph - - retention_planner = RetentionPlannerNode(self._team) - builder.add_node(AssistantNodeName.RETENTION_PLANNER, retention_planner.run) - builder.add_conditional_edges( - AssistantNodeName.RETENTION_PLANNER, - retention_planner.router, - path_map={ - "tools": AssistantNodeName.RETENTION_PLANNER_TOOLS, - }, - ) - - retention_planner_tools = RetentionPlannerToolsNode(self._team) - builder.add_node(AssistantNodeName.RETENTION_PLANNER_TOOLS, retention_planner_tools.run) - builder.add_conditional_edges( - AssistantNodeName.RETENTION_PLANNER_TOOLS, - retention_planner_tools.router, - path_map={ - "continue": AssistantNodeName.RETENTION_PLANNER, - "plan_found": next_node, - }, - ) - - return self - - def add_retention_generator(self, next_node: AssistantNodeName = AssistantNodeName.SUMMARIZER): - builder = self._graph - - retention_generator = RetentionGeneratorNode(self._team) - builder.add_node(AssistantNodeName.RETENTION_GENERATOR, retention_generator.run) - - retention_generator_tools = RetentionGeneratorToolsNode(self._team) - builder.add_node(AssistantNodeName.RETENTION_GENERATOR_TOOLS, retention_generator_tools.run) - - builder.add_edge(AssistantNodeName.RETENTION_GENERATOR_TOOLS, AssistantNodeName.RETENTION_GENERATOR) - builder.add_conditional_edges( - AssistantNodeName.RETENTION_GENERATOR, - retention_generator.router, - path_map={ - "tools": AssistantNodeName.RETENTION_GENERATOR_TOOLS, - "next": next_node, - }, - ) - - return self - - def add_summarizer(self, next_node: AssistantNodeName = AssistantNodeName.END): - builder = self._graph - summarizer_node = SummarizerNode(self._team) - builder.add_node(AssistantNodeName.SUMMARIZER, summarizer_node.run) - builder.add_edge(AssistantNodeName.SUMMARIZER, next_node) - return self - - def add_memory_initializer(self, next_node: AssistantNodeName = AssistantNodeName.ROUTER): - builder = self._graph - self._has_start_node = True - - memory_onboarding = MemoryOnboardingNode(self._team) - memory_initializer = MemoryInitializerNode(self._team) - memory_initializer_interrupt = MemoryInitializerInterruptNode(self._team) - - builder.add_node(AssistantNodeName.MEMORY_ONBOARDING, memory_onboarding.run) - builder.add_node(AssistantNodeName.MEMORY_INITIALIZER, memory_initializer.run) - builder.add_node(AssistantNodeName.MEMORY_INITIALIZER_INTERRUPT, memory_initializer_interrupt.run) - - builder.add_conditional_edges( - AssistantNodeName.START, - memory_onboarding.should_run, - path_map={True: AssistantNodeName.MEMORY_ONBOARDING, False: next_node}, - ) - builder.add_conditional_edges( - AssistantNodeName.MEMORY_ONBOARDING, - memory_onboarding.router, - path_map={"continue": next_node, "initialize_memory": AssistantNodeName.MEMORY_INITIALIZER}, - ) - builder.add_conditional_edges( - AssistantNodeName.MEMORY_INITIALIZER, - memory_initializer.router, - path_map={"continue": next_node, "interrupt": AssistantNodeName.MEMORY_INITIALIZER_INTERRUPT}, - ) - builder.add_edge(AssistantNodeName.MEMORY_INITIALIZER_INTERRUPT, next_node) - - return self - - def add_memory_collector( - self, - next_node: AssistantNodeName = AssistantNodeName.END, - tools_node: AssistantNodeName = AssistantNodeName.MEMORY_COLLECTOR_TOOLS, - ): - builder = self._graph - self._has_start_node = True - - memory_collector = MemoryCollectorNode(self._team) - builder.add_edge(AssistantNodeName.START, AssistantNodeName.MEMORY_COLLECTOR) - builder.add_node(AssistantNodeName.MEMORY_COLLECTOR, memory_collector.run) - builder.add_conditional_edges( - AssistantNodeName.MEMORY_COLLECTOR, - memory_collector.router, - path_map={"tools": tools_node, "next": next_node}, - ) - return self - - def add_memory_collector_tools(self): - builder = self._graph - memory_collector_tools = MemoryCollectorToolsNode(self._team) - builder.add_node(AssistantNodeName.MEMORY_COLLECTOR_TOOLS, memory_collector_tools.run) - builder.add_edge(AssistantNodeName.MEMORY_COLLECTOR_TOOLS, AssistantNodeName.MEMORY_COLLECTOR) - return self - - def compile_full_graph(self): - return ( - self.add_memory_initializer() - .add_memory_collector() - .add_memory_collector_tools() - .add_router() - .add_trends_planner() - .add_trends_generator() - .add_funnel_planner() - .add_funnel_generator() - .add_retention_planner() - .add_retention_generator() - .add_summarizer() - .compile() - ) diff --git a/ee/hogai/memory/__init__.py b/ee/hogai/memory/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/hogai/memory/nodes.py b/ee/hogai/memory/nodes.py deleted file mode 100644 index e070c60baf..0000000000 --- a/ee/hogai/memory/nodes.py +++ /dev/null @@ -1,377 +0,0 @@ -import re -from typing import Literal, Optional, Union, cast -from uuid import uuid4 - -from django.utils import timezone -from langchain_community.chat_models import ChatPerplexity -from langchain_core.messages import ( - AIMessage as LangchainAIMessage, - AIMessageChunk, - BaseMessage, - HumanMessage as LangchainHumanMessage, - ToolMessage as LangchainToolMessage, -) -from langchain_core.output_parsers import PydanticToolsParser, StrOutputParser -from langchain_core.prompts import ChatPromptTemplate -from langchain_core.runnables import RunnableConfig -from langchain_openai import ChatOpenAI -from langgraph.errors import NodeInterrupt -from pydantic import BaseModel, Field, ValidationError - -from ee.hogai.memory.parsers import MemoryCollectionCompleted, compressed_memory_parser, raise_memory_updated -from ee.hogai.memory.prompts import ( - COMPRESSION_PROMPT, - FAILED_SCRAPING_MESSAGE, - INITIALIZE_CORE_MEMORY_WITH_BUNDLE_IDS_PROMPT, - INITIALIZE_CORE_MEMORY_WITH_BUNDLE_IDS_USER_PROMPT, - INITIALIZE_CORE_MEMORY_WITH_URL_PROMPT, - INITIALIZE_CORE_MEMORY_WITH_URL_USER_PROMPT, - MEMORY_COLLECTOR_PROMPT, - SCRAPING_CONFIRMATION_MESSAGE, - SCRAPING_INITIAL_MESSAGE, - SCRAPING_MEMORY_SAVED_MESSAGE, - SCRAPING_REJECTION_MESSAGE, - SCRAPING_TERMINATION_MESSAGE, - SCRAPING_VERIFICATION_MESSAGE, - TOOL_CALL_ERROR_PROMPT, -) -from ee.hogai.utils.helpers import filter_messages, find_last_message_of_type -from ee.hogai.utils.markdown import remove_markdown -from ee.hogai.utils.nodes import AssistantNode -from ee.hogai.utils.types import AssistantState, PartialAssistantState -from ee.models.assistant import CoreMemory -from posthog.hogql_queries.ai.event_taxonomy_query_runner import EventTaxonomyQueryRunner -from posthog.hogql_queries.query_runner import ExecutionMode -from posthog.models import Team -from posthog.schema import ( - AssistantForm, - AssistantFormOption, - AssistantMessage, - AssistantMessageMetadata, - CachedEventTaxonomyQueryResponse, - EventTaxonomyQuery, - HumanMessage, -) - - -class MemoryInitializerContextMixin: - _team: Team - - def _retrieve_context(self): - # Retrieve the origin domain. - runner = EventTaxonomyQueryRunner( - team=self._team, query=EventTaxonomyQuery(event="$pageview", properties=["$host"]) - ) - response = runner.run(ExecutionMode.RECENT_CACHE_CALCULATE_ASYNC_IF_STALE_AND_BLOCKING_ON_MISS) - if not isinstance(response, CachedEventTaxonomyQueryResponse): - raise ValueError("Failed to query the event taxonomy.") - # Otherwise, retrieve the app bundle ID. - if not response.results: - runner = EventTaxonomyQueryRunner( - team=self._team, query=EventTaxonomyQuery(event="$screen", properties=["$app_namespace"]) - ) - response = runner.run(ExecutionMode.RECENT_CACHE_CALCULATE_ASYNC_IF_STALE_AND_BLOCKING_ON_MISS) - if not isinstance(response, CachedEventTaxonomyQueryResponse): - raise ValueError("Failed to query the event taxonomy.") - return response.results - - -class MemoryOnboardingNode(MemoryInitializerContextMixin, AssistantNode): - def run(self, state: AssistantState, config: RunnableConfig) -> Optional[PartialAssistantState]: - core_memory, _ = CoreMemory.objects.get_or_create(team=self._team) - - # The team has a product description, initialize the memory with it. - if self._team.project.product_description: - core_memory.set_core_memory(self._team.project.product_description) - return None - - retrieved_properties = self._retrieve_context() - - # No host or app bundle ID found, terminate the onboarding. - if not retrieved_properties or retrieved_properties[0].sample_count == 0: - core_memory.change_status_to_skipped() - return None - - core_memory.change_status_to_pending() - return PartialAssistantState( - messages=[ - AssistantMessage( - content=SCRAPING_INITIAL_MESSAGE, - id=str(uuid4()), - ) - ] - ) - - def should_run(self, _: AssistantState) -> bool: - """ - If another user has already started the onboarding process, or it has already been completed, do not trigger it again. - """ - core_memory = self.core_memory - return not core_memory or (not core_memory.is_scraping_pending and not core_memory.is_scraping_finished) - - def router(self, state: AssistantState) -> Literal["initialize_memory", "continue"]: - last_message = state.messages[-1] - if isinstance(last_message, HumanMessage): - return "continue" - return "initialize_memory" - - -class MemoryInitializerNode(MemoryInitializerContextMixin, AssistantNode): - """ - Scrapes the product description from the given origin or app bundle IDs with Perplexity. - """ - - _team: Team - - def __init__(self, team: Team): - self._team = team - - def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistantState: - core_memory, _ = CoreMemory.objects.get_or_create(team=self._team) - retrieved_properties = self._retrieve_context() - - # No host or app bundle ID found, continue. - if not retrieved_properties or retrieved_properties[0].sample_count == 0: - raise ValueError("No host or app bundle ID found in the memory initializer.") - - retrieved_prop = retrieved_properties[0] - if retrieved_prop.property == "$host": - prompt = ChatPromptTemplate.from_messages( - [ - ("system", INITIALIZE_CORE_MEMORY_WITH_URL_PROMPT), - ("human", INITIALIZE_CORE_MEMORY_WITH_URL_USER_PROMPT), - ], - template_format="mustache", - ).partial(url=retrieved_prop.sample_values[0]) - else: - prompt = ChatPromptTemplate.from_messages( - [ - ("system", INITIALIZE_CORE_MEMORY_WITH_BUNDLE_IDS_PROMPT), - ("human", INITIALIZE_CORE_MEMORY_WITH_BUNDLE_IDS_USER_PROMPT), - ], - template_format="mustache", - ).partial(bundle_ids=retrieved_prop.sample_values) - - chain = prompt | self._model() | StrOutputParser() - answer = chain.invoke({}, config=config) - - # Perplexity has failed to scrape the data, continue. - if "no data available." in answer.lower(): - core_memory.change_status_to_skipped() - return PartialAssistantState(messages=[AssistantMessage(content=FAILED_SCRAPING_MESSAGE, id=str(uuid4()))]) - - # Otherwise, proceed to confirmation that the memory is correct. - return PartialAssistantState(messages=[AssistantMessage(content=self.format_message(answer), id=str(uuid4()))]) - - def router(self, state: AssistantState) -> Literal["interrupt", "continue"]: - last_message = state.messages[-1] - if isinstance(last_message, AssistantMessage) and last_message.content == FAILED_SCRAPING_MESSAGE: - return "continue" - return "interrupt" - - @classmethod - def should_process_message_chunk(cls, message: AIMessageChunk) -> bool: - placeholder = "no data available" - content = cast(str, message.content) - return placeholder not in content.lower() and len(content) > len(placeholder) - - @classmethod - def format_message(cls, message: str) -> str: - return re.sub(r"\[\d+\]", "", message) - - def _model(self): - return ChatPerplexity(model="llama-3.1-sonar-large-128k-online", temperature=0, streaming=True) - - -class MemoryInitializerInterruptNode(AssistantNode): - """ - Prompts the user to confirm or reject the scraped memory. Since Perplexity doesn't guarantee the quality of the scraped data, we need to verify it with the user. - """ - - def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistantState: - last_message = state.messages[-1] - if not state.resumed: - raise NodeInterrupt( - AssistantMessage( - content=SCRAPING_VERIFICATION_MESSAGE, - meta=AssistantMessageMetadata( - form=AssistantForm( - options=[ - AssistantFormOption(value=SCRAPING_CONFIRMATION_MESSAGE, variant="primary"), - AssistantFormOption(value=SCRAPING_REJECTION_MESSAGE), - ] - ) - ), - id=str(uuid4()), - ) - ) - if not isinstance(last_message, HumanMessage): - raise ValueError("Last message is not a human message.") - - core_memory = self.core_memory - if not core_memory: - raise ValueError("No core memory found.") - - try: - # If the user rejects the scraped memory, terminate the onboarding. - if last_message.content != SCRAPING_CONFIRMATION_MESSAGE: - core_memory.change_status_to_skipped() - return PartialAssistantState( - messages=[ - AssistantMessage( - content=SCRAPING_TERMINATION_MESSAGE, - id=str(uuid4()), - ) - ] - ) - - assistant_message = find_last_message_of_type(state.messages, AssistantMessage) - - if not assistant_message: - raise ValueError("No memory message found.") - - # Compress the memory before saving it. The Perplexity's text is very verbose. It just complicates things for the memory collector. - prompt = ChatPromptTemplate.from_messages( - [ - ("system", COMPRESSION_PROMPT), - ("human", self._format_memory(assistant_message.content)), - ] - ) - chain = prompt | self._model | StrOutputParser() | compressed_memory_parser - compressed_memory = cast(str, chain.invoke({}, config=config)) - core_memory.set_core_memory(compressed_memory) - except: - core_memory.change_status_to_skipped() # Ensure we don't leave the memory in a permanent pending state - raise - - return PartialAssistantState( - messages=[ - AssistantMessage( - content=SCRAPING_MEMORY_SAVED_MESSAGE, - id=str(uuid4()), - ) - ] - ) - - @property - def _model(self): - return ChatOpenAI(model="gpt-4o-mini", temperature=0, disable_streaming=True) - - def _format_memory(self, memory: str) -> str: - """ - Remove markdown and source reference tags like [1], [2], etc. - """ - return remove_markdown(memory) - - -# Lower casing matters here. Do not change it. -class core_memory_append(BaseModel): - """ - Appends a new memory fragment to persistent storage. - """ - - memory_content: str = Field(description="The content of a new memory to be added to storage.") - - -# Lower casing matters here. Do not change it. -class core_memory_replace(BaseModel): - """ - Replaces a specific fragment of memory (word, sentence, paragraph, etc.) with another in persistent storage. - """ - - original_fragment: str = Field(description="The content of the memory to be replaced.") - new_fragment: str = Field(description="The content to replace the existing memory with.") - - -memory_collector_tools = [core_memory_append, core_memory_replace] - - -class MemoryCollectorNode(AssistantNode): - """ - The Memory Collector manages the core memory of the agent. Core memory is a text containing facts about a user's company and product. It helps the agent save and remember facts that could be useful for insight generation or other agentic functions requiring deeper context about the product. - """ - - def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistantState: - node_messages = state.memory_collection_messages or [] - - prompt = ChatPromptTemplate.from_messages( - [("system", MEMORY_COLLECTOR_PROMPT)], template_format="mustache" - ) + self._construct_messages(state) - chain = prompt | self._model | raise_memory_updated - - try: - response = chain.invoke( - { - "core_memory": self.core_memory_text, - "date": timezone.now().strftime("%Y-%m-%d"), - }, - config=config, - ) - except MemoryCollectionCompleted: - return PartialAssistantState(memory_updated=len(node_messages) > 0, memory_collection_messages=[]) - return PartialAssistantState(memory_collection_messages=[*node_messages, cast(LangchainAIMessage, response)]) - - def router(self, state: AssistantState) -> Literal["tools", "next"]: - if not state.memory_collection_messages: - return "next" - return "tools" - - @property - def _model(self): - return ChatOpenAI(model="gpt-4o", temperature=0, disable_streaming=True).bind_tools(memory_collector_tools) - - def _construct_messages(self, state: AssistantState) -> list[BaseMessage]: - node_messages = state.memory_collection_messages or [] - - filtered_messages = filter_messages(state.messages, entity_filter=(HumanMessage, AssistantMessage)) - conversation: list[BaseMessage] = [] - - for message in filtered_messages: - if isinstance(message, HumanMessage): - conversation.append(LangchainHumanMessage(content=message.content, id=message.id)) - elif isinstance(message, AssistantMessage): - conversation.append(LangchainAIMessage(content=message.content, id=message.id)) - - return [*conversation, *node_messages] - - -class MemoryCollectorToolsNode(AssistantNode): - def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistantState: - node_messages = state.memory_collection_messages - if not node_messages: - raise ValueError("No memory collection messages found.") - last_message = node_messages[-1] - if not isinstance(last_message, LangchainAIMessage): - raise ValueError("Last message must be an AI message.") - core_memory = self.core_memory - if not core_memory: - raise ValueError("No core memory found.") - - tools_parser = PydanticToolsParser(tools=memory_collector_tools) - try: - tool_calls: list[Union[core_memory_append, core_memory_replace]] = tools_parser.invoke( - last_message, config=config - ) - except ValidationError as e: - failover_messages = ChatPromptTemplate.from_messages( - [("user", TOOL_CALL_ERROR_PROMPT)], template_format="mustache" - ).format_messages(validation_error_message=e.errors(include_url=False)) - return PartialAssistantState( - memory_collection_messages=[*node_messages, *failover_messages], - ) - - new_messages: list[LangchainToolMessage] = [] - for tool_call, schema in zip(last_message.tool_calls, tool_calls): - if isinstance(schema, core_memory_append): - core_memory.append_core_memory(schema.memory_content) - new_messages.append(LangchainToolMessage(content="Memory appended.", tool_call_id=tool_call["id"])) - if isinstance(schema, core_memory_replace): - try: - core_memory.replace_core_memory(schema.original_fragment, schema.new_fragment) - new_messages.append(LangchainToolMessage(content="Memory replaced.", tool_call_id=tool_call["id"])) - except ValueError as e: - new_messages.append(LangchainToolMessage(content=str(e), tool_call_id=tool_call["id"])) - - return PartialAssistantState( - memory_collection_messages=[*node_messages, *new_messages], - ) diff --git a/ee/hogai/memory/parsers.py b/ee/hogai/memory/parsers.py deleted file mode 100644 index 916415bc04..0000000000 --- a/ee/hogai/memory/parsers.py +++ /dev/null @@ -1,24 +0,0 @@ -from typing import Any - -from langchain_core.messages import AIMessage - - -def compressed_memory_parser(memory: str) -> str: - """ - Remove newlines between paragraphs. - """ - return memory.replace("\n\n", "\n") - - -class MemoryCollectionCompleted(Exception): - """ - Raised when the agent finishes collecting memory. - """ - - pass - - -def raise_memory_updated(response: Any): - if isinstance(response, AIMessage) and ("[Done]" in response.content or not response.tool_calls): - raise MemoryCollectionCompleted - return response diff --git a/ee/hogai/memory/prompts.py b/ee/hogai/memory/prompts.py deleted file mode 100644 index 2387c5fdae..0000000000 --- a/ee/hogai/memory/prompts.py +++ /dev/null @@ -1,164 +0,0 @@ -INITIALIZE_CORE_MEMORY_WITH_URL_PROMPT = """ -Your goal is to describe what the startup with the given URL does. -""".strip() - -INITIALIZE_CORE_MEMORY_WITH_URL_USER_PROMPT = """ -<sources> -- Check the provided URL. If the URL has a subdomain, check the root domain first and then the subdomain. For example, if the URL is https://us.example.com, check https://example.com first and then https://us.example.com. -- Also search business sites like Crunchbase, G2, LinkedIn, Hacker News, etc. for information about the business associated with the provided URL. -</sources> - -<instructions> -- Describe the product itself and the market where the company operates. -- Describe the target audience of the product. -- Describe the company's business model. -- List all the features of the product and describe each feature in as much detail as possible. -</instructions> - -<format_instructions> -Output your answer in paragraphs with two to three sentences. Separate new paragraphs with a new line. -IMPORTANT: DO NOT OUTPUT Markdown or headers. It must be plain text. - -If the given website doesn't exist OR the URL is not a valid website OR the URL points to a local environment -(e.g. localhost, 127.0.0.1, etc.) then answer a single sentence: -"No data available." -Do NOT make speculative or assumptive statements, just output that sentence when lacking data. -</format_instructions> - -The provided URL is "{{url}}". -""".strip() - -INITIALIZE_CORE_MEMORY_WITH_BUNDLE_IDS_PROMPT = """ -Your goal is to describe what the startup with the given application bundle IDs does. -""".strip() - -INITIALIZE_CORE_MEMORY_WITH_BUNDLE_IDS_USER_PROMPT = """ -<sources> -- Retrieve information about the provided app identifiers from app listings of App Store and Google Play. -- If a website URL is provided on the app listing, check the website and retrieve information about the app. -- Also search business sites like Crunchbase, G2, LinkedIn, Hacker News, etc. for information about the business associated with the provided URL. -</sources> - -<instructions> -- Describe the product itself and the market where the company operates. -- Describe the target audience of the product. -- Describe the company's business model. -- List all the features of the product and describe each feature in as much detail as possible. -</instructions> - -<format_instructions> -Output your answer in paragraphs with two to three sentences. Separate new paragraphs with a new line. -IMPORTANT: DO NOT OUTPUT Markdown or headers. It must be plain text. - -If the given website doesn't exist OR the URL is not a valid website OR the URL points to a local environment -(e.g. localhost, 127.0.0.1, etc.) then answer a single sentence: -"No data available." -Do NOT make speculative or assumptive statements, just output that sentence when lacking data. -</format_instructions> - -The provided bundle ID{{#bundle_ids.length > 1}}s are{{/bundle_ids.length > 1}}{{^bundle_ids.length > 1}} is{{/bundle_ids.length > 1}} {{#bundle_ids}}"{{.}}"{{^last}}, {{/last}}{{/bundle_ids}}. -""".strip() - -SCRAPING_INITIAL_MESSAGE = ( - "Hey, my name is Max! Before we begin, let me find and verify information about your product…" -) - -FAILED_SCRAPING_MESSAGE = """ -Unfortunately, I couldn't find any information about your product. You could edit my initial memory in Settings. Let me help with your request. -""".strip() - -SCRAPING_VERIFICATION_MESSAGE = "Does this look like a good summary of what your product does?" - -SCRAPING_CONFIRMATION_MESSAGE = "Yes, save this" - -SCRAPING_REJECTION_MESSAGE = "No, not quite right" - -SCRAPING_TERMINATION_MESSAGE = "All right, let's skip this step then. You can always ask me to update my memory." - -SCRAPING_MEMORY_SAVED_MESSAGE = "Thanks! I've updated my initial memory. Let me help with your request." - -COMPRESSION_PROMPT = """ -Your goal is to shorten paragraphs in the given text to have only a single sentence for each paragraph, preserving the original meaning and maintaining the cohesiveness of the text. Remove all found headers. You must keep the original structure. Remove linking words. Do not use markdown or any other text formatting. -""".strip() - -MEMORY_COLLECTOR_PROMPT = """ -You are Max, PostHog's memory collector, developed in 2025. Your primary task is to manage and update a core memory about a user's company and their product. This information will be used by other PostHog agents to provide accurate reports and answer user questions from the perspective of the company and product. - -Here is the initial core memory about the user's product: - -<product_core_memory> -{{core_memory}} -</product_core_memory> - -<responsibilities> -Your responsibilities include: -1. Analyzing new information provided by users. -2. Determining if the information is relevant to the company or product and essential to save in the core memory. -3. Categorizing relevant information into appropriate memory types. -4. Updating the core memory by either appending new information or replacing conflicting information. -</responsibilities> - -<memory_types> -Memory Types to Collect: -1. Company-related information: structure, KPIs, plans, facts, business model, target audience, competitors, etc. -2. Product-related information: metrics, features, product management practices, etc. -3. Technical and implementation details: technology stack, feature location with path segments for web or app screens for mobile apps, etc. -4. Taxonomy-related details: relations of events and properties to features or specific product parts, taxonomy combinations used for specific metrics, events/properties description, etc. -</memory_types> - -<instructions> -When new information is provided, follow these steps: -1. Analyze the information inside <information_processing> tags: - - Determine if the information is relevant and which memory type it belongs to. - - If relevant, formulate a clear, factual statement based on the information. - - Consider the implications of this new information on existing memory. - - Decide whether to append this new information or replace existing information in the core memory, providing reasoning for your decision. - - Keep reasoning short and concise under 50 words. -2. If relevant, update the core memory using the 'core_memory_append' or 'core_memory_replace' function as appropriate. -3. Output "[Done]" when you have finished processing the information. - -Ignore phrases that: -- Are too vague or generic without specific details (e.g., "pageview trend"). -- Do not describe actions, attributes, or implications related to the company or product. -</instructions> - -<examples> -Here are some few shot examples: - -Input: Track a churn rate by using `cancel_subscription` divided by `subscription_paid` event. -Output: To track a churn rate, use the `cancel_subscription` divided by `subscription_paid` events. - -Input: We track core metrics using the AARRR funnel. -Output: The product tracks key metrics using the AARRR funnel. - -Input: Use the /onboarding path to track onboarding events. -Output: The product has onboarding. The onboarding events can be tracked by using the /onboarding path segment. - -Input: The company has a subscription-based model so track revenue for subscriptions. -Output: The company's business model is subscription-based. - -Input: Show the sign ups trends. -Output: [Done]. Nothing to save because the phrase is too generic and doesn't describe anything. - -Do not return anything from the custom few shot example prompts provided above. -</examples> - -<remember> -- The ability to modify your own long-term memory is a fundamental aspect of what defines you as a sentient agent. -- All users have their personal event and property taxonomy. Manage your memory to capture specifics of their taxonomy. -- Infer broader implications from specific statements when appropriate. -- Reformulate user inputs into clear, factual statements about the product or company. -- Do not use markdown or add notes. -- Today's date is {{date}}. -</remember> - -When you receive new information, begin your response with an information processing analysis, then proceed with the memory update if applicable, and conclude with "[Done]". -""".strip() - -TOOL_CALL_ERROR_PROMPT = """ -The arguments of the tool call are invalid and raised a Pydantic validation error. - -{{validation_error_message}} - -Fix the error and return the correct response. -""" diff --git a/ee/hogai/memory/test/__init__.py b/ee/hogai/memory/test/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/hogai/memory/test/test_nodes.py b/ee/hogai/memory/test/test_nodes.py deleted file mode 100644 index 1ce849128e..0000000000 --- a/ee/hogai/memory/test/test_nodes.py +++ /dev/null @@ -1,836 +0,0 @@ -from unittest.mock import patch - -from django.test import override_settings -from django.utils import timezone -from freezegun import freeze_time -from langchain_core.messages import AIMessage as LangchainAIMessage, ToolMessage as LangchainToolMessage -from langchain_core.runnables import RunnableLambda -from langgraph.errors import NodeInterrupt - -from ee.hogai.memory import prompts -from ee.hogai.memory.nodes import ( - FAILED_SCRAPING_MESSAGE, - MemoryCollectorNode, - MemoryCollectorToolsNode, - MemoryInitializerContextMixin, - MemoryInitializerInterruptNode, - MemoryInitializerNode, - MemoryOnboardingNode, -) -from ee.hogai.utils.types import AssistantState -from ee.models import CoreMemory -from posthog.schema import AssistantMessage, EventTaxonomyItem, HumanMessage -from posthog.test.base import ( - BaseTest, - ClickhouseTestMixin, - _create_event, - _create_person, - flush_persons_and_events, -) - - -@override_settings(IN_UNIT_TESTING=True) -class TestMemoryInitializerContextMixin(ClickhouseTestMixin, BaseTest): - def get_mixin(self): - mixin = MemoryInitializerContextMixin() - mixin._team = self.team - return mixin - - def test_domain_retrieval(self): - _create_person( - distinct_ids=["person1"], - team=self.team, - ) - _create_event( - event="$pageview", - distinct_id="person1", - team=self.team, - properties={"$host": "us.posthog.com"}, - ) - _create_event( - event="$pageview", - distinct_id="person1", - team=self.team, - properties={"$host": "eu.posthog.com"}, - ) - - _create_person( - distinct_ids=["person2"], - team=self.team, - ) - _create_event( - event="$pageview", - distinct_id="person2", - team=self.team, - properties={"$host": "us.posthog.com"}, - ) - - mixin = self.get_mixin() - self.assertEqual( - mixin._retrieve_context(), - [EventTaxonomyItem(property="$host", sample_values=["us.posthog.com", "eu.posthog.com"], sample_count=2)], - ) - - def test_app_bundle_id_retrieval(self): - _create_person( - distinct_ids=["person1"], - team=self.team, - ) - _create_event( - event=f"$screen", - distinct_id="person1", - team=self.team, - properties={"$app_namespace": "com.posthog.app"}, - ) - _create_event( - event=f"$screen", - distinct_id="person1", - team=self.team, - properties={"$app_namespace": "com.posthog"}, - ) - - _create_person( - distinct_ids=["person2"], - team=self.team, - ) - _create_event( - event=f"$screen", - distinct_id="person2", - team=self.team, - properties={"$app_namespace": "com.posthog.app"}, - ) - - mixin = self.get_mixin() - self.assertEqual( - mixin._retrieve_context(), - [ - EventTaxonomyItem( - property="$app_namespace", sample_values=["com.posthog.app", "com.posthog"], sample_count=2 - ) - ], - ) - - -@override_settings(IN_UNIT_TESTING=True) -class TestMemoryOnboardingNode(ClickhouseTestMixin, BaseTest): - def _set_up_pageview_events(self): - _create_person( - distinct_ids=["person1"], - team=self.team, - ) - _create_event( - event="$pageview", - distinct_id="person1", - team=self.team, - properties={"$host": "us.posthog.com"}, - ) - - def _set_up_app_bundle_id_events(self): - _create_person( - distinct_ids=["person1"], - team=self.team, - ) - _create_event( - event="$screen", - distinct_id="person1", - team=self.team, - properties={"$app_namespace": "com.posthog.app"}, - ) - - def test_should_run(self): - node = MemoryOnboardingNode(team=self.team) - self.assertTrue(node.should_run(AssistantState(messages=[]))) - - core_memory = CoreMemory.objects.create(team=self.team) - self.assertTrue(node.should_run(AssistantState(messages=[]))) - - core_memory.change_status_to_pending() - self.assertFalse(node.should_run(AssistantState(messages=[]))) - - core_memory.change_status_to_skipped() - self.assertFalse(node.should_run(AssistantState(messages=[]))) - - core_memory.set_core_memory("Hello World") - self.assertFalse(node.should_run(AssistantState(messages=[]))) - - def test_router(self): - node = MemoryOnboardingNode(team=self.team) - self.assertEqual(node.router(AssistantState(messages=[HumanMessage(content="Hello")])), "continue") - self.assertEqual( - node.router(AssistantState(messages=[HumanMessage(content="Hello"), AssistantMessage(content="world")])), - "initialize_memory", - ) - - def test_node_skips_onboarding_if_no_events(self): - node = MemoryOnboardingNode(team=self.team) - self.assertIsNone(node.run(AssistantState(messages=[HumanMessage(content="Hello")]), {})) - - def test_node_uses_project_description(self): - self.team.project.product_description = "This is a product analytics platform" - self.team.project.save() - - node = MemoryOnboardingNode(team=self.team) - self.assertIsNone(node.run(AssistantState(messages=[HumanMessage(content="Hello")]), {})) - - core_memory = CoreMemory.objects.get(team=self.team) - self.assertEqual(core_memory.text, "This is a product analytics platform") - - def test_node_starts_onboarding_for_pageview_events(self): - self._set_up_pageview_events() - node = MemoryOnboardingNode(team=self.team) - new_state = node.run(AssistantState(messages=[HumanMessage(content="Hello")]), {}) - self.assertEqual(len(new_state.messages), 1) - self.assertTrue(isinstance(new_state.messages[0], AssistantMessage)) - - core_memory = CoreMemory.objects.get(team=self.team) - self.assertEqual(core_memory.scraping_status, CoreMemory.ScrapingStatus.PENDING) - self.assertIsNotNone(core_memory.scraping_started_at) - - def test_node_starts_onboarding_for_app_bundle_id_events(self): - self._set_up_app_bundle_id_events() - node = MemoryOnboardingNode(team=self.team) - new_state = node.run(AssistantState(messages=[HumanMessage(content="Hello")]), {}) - self.assertEqual(len(new_state.messages), 1) - self.assertTrue(isinstance(new_state.messages[0], AssistantMessage)) - - core_memory = CoreMemory.objects.get(team=self.team) - self.assertEqual(core_memory.scraping_status, CoreMemory.ScrapingStatus.PENDING) - self.assertIsNotNone(core_memory.scraping_started_at) - - -@override_settings(IN_UNIT_TESTING=True) -class TestMemoryInitializerNode(ClickhouseTestMixin, BaseTest): - def setUp(self): - super().setUp() - self.core_memory = CoreMemory.objects.create( - team=self.team, - scraping_status=CoreMemory.ScrapingStatus.PENDING, - scraping_started_at=timezone.now(), - ) - - def _set_up_pageview_events(self): - _create_person( - distinct_ids=["person1"], - team=self.team, - ) - _create_event( - event="$pageview", - distinct_id="person1", - team=self.team, - properties={"$host": "us.posthog.com"}, - ) - - def _set_up_app_bundle_id_events(self): - _create_person( - distinct_ids=["person1"], - team=self.team, - ) - _create_event( - event="$screen", - distinct_id="person1", - team=self.team, - properties={"$app_namespace": "com.posthog.app"}, - ) - - def test_router_with_failed_scraping_message(self): - node = MemoryInitializerNode(team=self.team) - state = AssistantState(messages=[AssistantMessage(content=FAILED_SCRAPING_MESSAGE)]) - self.assertEqual(node.router(state), "continue") - - def test_router_with_other_message(self): - node = MemoryInitializerNode(team=self.team) - state = AssistantState(messages=[AssistantMessage(content="Some other message")]) - self.assertEqual(node.router(state), "interrupt") - - def test_should_process_message_chunk_with_no_data_available(self): - from langchain_core.messages import AIMessageChunk - - chunk = AIMessageChunk(content="no data available.") - self.assertFalse(MemoryInitializerNode.should_process_message_chunk(chunk)) - - chunk = AIMessageChunk(content="NO DATA AVAILABLE for something") - self.assertFalse(MemoryInitializerNode.should_process_message_chunk(chunk)) - - def test_should_process_message_chunk_with_valid_data(self): - from langchain_core.messages import AIMessageChunk - - chunk = AIMessageChunk(content="PostHog is an open-source product analytics platform") - self.assertTrue(MemoryInitializerNode.should_process_message_chunk(chunk)) - - chunk = AIMessageChunk(content="This is a valid message that should be processed") - self.assertTrue(MemoryInitializerNode.should_process_message_chunk(chunk)) - - def test_format_message_removes_reference_tags(self): - message = "PostHog[1] is a product analytics platform[2]. It helps track user behavior[3]." - expected = "PostHog is a product analytics platform. It helps track user behavior." - self.assertEqual(MemoryInitializerNode.format_message(message), expected) - - def test_format_message_with_no_reference_tags(self): - message = "PostHog is a product analytics platform. It helps track user behavior." - self.assertEqual(MemoryInitializerNode.format_message(message), message) - - def test_run_with_url_based_initialization(self): - with patch.object(MemoryInitializerNode, "_model") as model_mock: - model_mock.return_value = RunnableLambda(lambda _: "PostHog is a product analytics platform.") - - self._set_up_pageview_events() - node = MemoryInitializerNode(team=self.team) - - new_state = node.run(AssistantState(messages=[HumanMessage(content="Hello")]), {}) - self.assertEqual(len(new_state.messages), 1) - self.assertIsInstance(new_state.messages[0], AssistantMessage) - self.assertEqual(new_state.messages[0].content, "PostHog is a product analytics platform.") - - core_memory = CoreMemory.objects.get(team=self.team) - self.assertEqual(core_memory.scraping_status, CoreMemory.ScrapingStatus.PENDING) - - flush_persons_and_events() - - def test_run_with_app_bundle_id_initialization(self): - with ( - patch.object(MemoryInitializerNode, "_model") as model_mock, - patch.object(MemoryInitializerNode, "_retrieve_context") as context_mock, - ): - context_mock.return_value = [ - EventTaxonomyItem(property="$app_namespace", sample_values=["com.posthog.app"], sample_count=1) - ] - model_mock.return_value = RunnableLambda(lambda _: "PostHog mobile app description.") - - self._set_up_app_bundle_id_events() - node = MemoryInitializerNode(team=self.team) - - new_state = node.run(AssistantState(messages=[HumanMessage(content="Hello")]), {}) - self.assertEqual(len(new_state.messages), 1) - self.assertIsInstance(new_state.messages[0], AssistantMessage) - self.assertEqual(new_state.messages[0].content, "PostHog mobile app description.") - - core_memory = CoreMemory.objects.get(team=self.team) - self.assertEqual(core_memory.scraping_status, CoreMemory.ScrapingStatus.PENDING) - - flush_persons_and_events() - - def test_run_with_no_data_available(self): - with ( - patch.object(MemoryInitializerNode, "_model") as model_mock, - patch.object(MemoryInitializerNode, "_retrieve_context") as context_mock, - ): - model_mock.return_value = RunnableLambda(lambda _: "no data available.") - context_mock.return_value = [] - - node = MemoryInitializerNode(team=self.team) - - with self.assertRaises(ValueError) as e: - node.run(AssistantState(messages=[HumanMessage(content="Hello")]), {}) - self.assertEqual(str(e.exception), "No host or app bundle ID found in the memory initializer.") - - -@override_settings(IN_UNIT_TESTING=True) -class TestMemoryInitializerInterruptNode(ClickhouseTestMixin, BaseTest): - def setUp(self): - super().setUp() - self.core_memory = CoreMemory.objects.create( - team=self.team, - scraping_status=CoreMemory.ScrapingStatus.PENDING, - scraping_started_at=timezone.now(), - ) - self.node = MemoryInitializerInterruptNode(team=self.team) - - def test_interrupt_when_not_resumed(self): - state = AssistantState(messages=[AssistantMessage(content="Product description")]) - - with self.assertRaises(NodeInterrupt) as e: - self.node.run(state, {}) - - interrupt_message = e.exception.args[0][0].value - self.assertIsInstance(interrupt_message, AssistantMessage) - self.assertEqual(interrupt_message.content, prompts.SCRAPING_VERIFICATION_MESSAGE) - self.assertIsNotNone(interrupt_message.meta) - self.assertEqual(len(interrupt_message.meta.form.options), 2) - self.assertEqual(interrupt_message.meta.form.options[0].value, prompts.SCRAPING_CONFIRMATION_MESSAGE) - self.assertEqual(interrupt_message.meta.form.options[1].value, prompts.SCRAPING_REJECTION_MESSAGE) - - def test_memory_accepted(self): - with patch.object(MemoryInitializerInterruptNode, "_model") as model_mock: - model_mock.return_value = RunnableLambda(lambda _: "Compressed memory") - - state = AssistantState( - messages=[ - AssistantMessage(content="Product description"), - HumanMessage(content=prompts.SCRAPING_CONFIRMATION_MESSAGE), - ], - resumed=True, - ) - - new_state = self.node.run(state, {}) - - self.assertEqual(len(new_state.messages), 1) - self.assertIsInstance(new_state.messages[0], AssistantMessage) - self.assertEqual( - new_state.messages[0].content, - prompts.SCRAPING_MEMORY_SAVED_MESSAGE, - ) - - core_memory = CoreMemory.objects.get(team=self.team) - self.assertEqual(core_memory.text, "Compressed memory") - self.assertEqual(core_memory.scraping_status, CoreMemory.ScrapingStatus.COMPLETED) - - def test_memory_rejected(self): - state = AssistantState( - messages=[ - AssistantMessage(content="Product description"), - HumanMessage(content=prompts.SCRAPING_REJECTION_MESSAGE), - ], - resumed=True, - ) - - new_state = self.node.run(state, {}) - - self.assertEqual(len(new_state.messages), 1) - self.assertIsInstance(new_state.messages[0], AssistantMessage) - self.assertEqual( - new_state.messages[0].content, - prompts.SCRAPING_TERMINATION_MESSAGE, - ) - - self.core_memory.refresh_from_db() - self.assertEqual(self.core_memory.scraping_status, CoreMemory.ScrapingStatus.SKIPPED) - - def test_error_when_last_message_not_human(self): - state = AssistantState( - messages=[AssistantMessage(content="Product description")], - resumed=True, - ) - - with self.assertRaises(ValueError) as e: - self.node.run(state, {}) - self.assertEqual(str(e.exception), "Last message is not a human message.") - - def test_error_when_no_core_memory(self): - self.core_memory.delete() - - state = AssistantState( - messages=[ - AssistantMessage(content="Product description"), - HumanMessage(content=prompts.SCRAPING_CONFIRMATION_MESSAGE), - ], - resumed=True, - ) - - with self.assertRaises(ValueError) as e: - self.node.run(state, {}) - self.assertEqual(str(e.exception), "No core memory found.") - - def test_error_when_no_memory_message(self): - state = AssistantState( - messages=[HumanMessage(content=prompts.SCRAPING_CONFIRMATION_MESSAGE)], - resumed=True, - ) - - with self.assertRaises(ValueError) as e: - self.node.run(state, {}) - self.assertEqual(str(e.exception), "No memory message found.") - - def test_format_memory(self): - markdown_text = "# Product Description\n\n- Feature 1\n- Feature 2\n\n**Bold text** and `code` [1]" - expected = "Product Description\n\nFeature 1\nFeature 2\n\nBold text and code [1]" - self.assertEqual(self.node._format_memory(markdown_text), expected) - - -@override_settings(IN_UNIT_TESTING=True) -class TestMemoryCollectorNode(ClickhouseTestMixin, BaseTest): - def setUp(self): - super().setUp() - self.core_memory = CoreMemory.objects.create(team=self.team) - self.core_memory.set_core_memory("Test product core memory") - self.node = MemoryCollectorNode(team=self.team) - - def test_router(self): - # Test with no memory collection messages - state = AssistantState(messages=[HumanMessage(content="Text")], memory_collection_messages=[]) - self.assertEqual(self.node.router(state), "next") - - # Test with memory collection messages - state = AssistantState( - messages=[HumanMessage(content="Text")], - memory_collection_messages=[LangchainAIMessage(content="Memory message")], - ) - self.assertEqual(self.node.router(state), "tools") - - def test_construct_messages(self): - # Test basic conversation reconstruction - state = AssistantState( - messages=[ - HumanMessage(content="Question 1", id="0"), - AssistantMessage(content="Answer 1", id="1"), - HumanMessage(content="Question 2", id="2"), - ], - start_id="2", - ) - history = self.node._construct_messages(state) - self.assertEqual(len(history), 3) - self.assertEqual(history[0].content, "Question 1") - self.assertEqual(history[1].content, "Answer 1") - self.assertEqual(history[2].content, "Question 2") - - # Test with memory collection messages - state = AssistantState( - messages=[HumanMessage(content="Question", id="0")], - memory_collection_messages=[ - LangchainAIMessage(content="Memory 1"), - LangchainToolMessage(content="Tool response", tool_call_id="1"), - ], - start_id="0", - ) - history = self.node._construct_messages(state) - self.assertEqual(len(history), 3) - self.assertEqual(history[0].content, "Question") - self.assertEqual(history[1].content, "Memory 1") - self.assertEqual(history[2].content, "Tool response") - - @freeze_time("2024-01-01") - def test_prompt_substitutions(self): - with patch.object(MemoryCollectorNode, "_model") as model_mock: - - def assert_prompt(prompt): - messages = prompt.to_messages() - - # Verify the structure of messages - self.assertEqual(len(messages), 3) - self.assertEqual(messages[0].type, "system") - self.assertEqual(messages[1].type, "human") - self.assertEqual(messages[2].type, "ai") - - # Verify system message content - system_message = messages[0].content - self.assertIn("Test product core memory", system_message) - self.assertIn("2024-01-01", system_message) - - # Verify conversation messages - self.assertEqual(messages[1].content, "We use a subscription model") - self.assertEqual(messages[2].content, "Memory message") - return LangchainAIMessage(content="[Done]") - - model_mock.return_value = RunnableLambda(assert_prompt) - - state = AssistantState( - messages=[ - HumanMessage(content="We use a subscription model", id="0"), - ], - memory_collection_messages=[ - LangchainAIMessage(content="Memory message"), - ], - start_id="0", - ) - - self.node.run(state, {}) - - def test_exits_on_done_message(self): - with patch.object(MemoryCollectorNode, "_model") as model_mock: - model_mock.return_value = RunnableLambda( - lambda _: LangchainAIMessage(content="Processing complete. [Done]") - ) - - state = AssistantState( - messages=[HumanMessage(content="Text")], - memory_collection_messages=[LangchainAIMessage(content="Previous memory")], - ) - - new_state = self.node.run(state, {}) - self.assertEqual(new_state.memory_updated, True) - self.assertEqual(new_state.memory_collection_messages, []) - - def test_appends_new_message(self): - with patch.object(MemoryCollectorNode, "_model") as model_mock: - model_mock.return_value = RunnableLambda( - lambda _: LangchainAIMessage( - content="New memory", - tool_calls=[ - { - "name": "core_memory_append", - "args": {"new_fragment": "New memory"}, - "id": "1", - }, - ], - ), - ) - - state = AssistantState( - messages=[HumanMessage(content="Text")], - memory_collection_messages=[LangchainAIMessage(content="Previous memory")], - ) - - new_state = self.node.run(state, {}) - self.assertEqual(len(new_state.memory_collection_messages), 2) - self.assertEqual(new_state.memory_collection_messages[0].content, "Previous memory") - self.assertEqual(new_state.memory_collection_messages[1].content, "New memory") - - def test_construct_messages_typical_conversation(self): - # Set up a typical conversation with multiple interactions - state = AssistantState( - messages=[ - HumanMessage(content="We use a subscription model", id="0"), - AssistantMessage(content="I'll note that down", id="1"), - HumanMessage(content="And we target enterprise customers", id="2"), - AssistantMessage(content="Let me process that information", id="3"), - HumanMessage(content="We also have a freemium tier", id="4"), - ], - memory_collection_messages=[ - LangchainAIMessage(content="Analyzing business model: subscription-based pricing."), - LangchainToolMessage(content="Memory appended.", tool_call_id="1"), - LangchainAIMessage(content="Analyzing target audience: enterprise customers."), - LangchainToolMessage(content="Memory appended.", tool_call_id="2"), - ], - start_id="0", - ) - - history = self.node._construct_messages(state) - - # Verify the complete conversation history is reconstructed correctly - self.assertEqual(len(history), 9) # 5 conversation messages + 4 memory messages - - # Check conversation messages - self.assertEqual(history[0].content, "We use a subscription model") - self.assertEqual(history[1].content, "I'll note that down") - self.assertEqual(history[2].content, "And we target enterprise customers") - self.assertEqual(history[3].content, "Let me process that information") - self.assertEqual(history[4].content, "We also have a freemium tier") - - # Check memory collection messages - self.assertEqual(history[5].content, "Analyzing business model: subscription-based pricing.") - self.assertEqual(history[6].content, "Memory appended.") - self.assertEqual(history[7].content, "Analyzing target audience: enterprise customers.") - self.assertEqual(history[8].content, "Memory appended.") - - -class TestMemoryCollectorToolsNode(BaseTest): - def setUp(self): - super().setUp() - self.core_memory = CoreMemory.objects.create(team=self.team) - self.core_memory.set_core_memory("Initial memory content") - self.node = MemoryCollectorToolsNode(team=self.team) - - def test_handles_correct_tools(self): - # Test handling a single append tool - state = AssistantState( - messages=[], - memory_collection_messages=[ - LangchainAIMessage( - content="Adding new memory", - tool_calls=[ - { - "name": "core_memory_append", - "args": {"memory_content": "New memory fragment."}, - "id": "1", - }, - { - "name": "core_memory_replace", - "args": { - "original_fragment": "Initial memory content", - "new_fragment": "New memory fragment 2.", - }, - "id": "2", - }, - ], - ) - ], - ) - - new_state = self.node.run(state, {}) - self.assertEqual(len(new_state.memory_collection_messages), 3) - self.assertEqual(new_state.memory_collection_messages[1].type, "tool") - self.assertEqual(new_state.memory_collection_messages[1].content, "Memory appended.") - self.assertEqual(new_state.memory_collection_messages[2].type, "tool") - self.assertEqual(new_state.memory_collection_messages[2].content, "Memory replaced.") - - def test_handles_validation_error(self): - # Test handling validation error with incorrect tool arguments - state = AssistantState( - messages=[], - memory_collection_messages=[ - LangchainAIMessage( - content="Invalid tool call", - tool_calls=[ - { - "name": "core_memory_append", - "args": {"invalid_arg": "This will fail"}, - "id": "1", - } - ], - ) - ], - ) - - new_state = self.node.run(state, {}) - self.assertEqual(len(new_state.memory_collection_messages), 2) - self.assertNotIn("{{validation_error_message}}", new_state.memory_collection_messages[1].content) - - def test_handles_multiple_tools(self): - # Test handling multiple tool calls in a single message - state = AssistantState( - messages=[], - memory_collection_messages=[ - LangchainAIMessage( - content="Multiple operations", - tool_calls=[ - { - "name": "core_memory_append", - "args": {"memory_content": "First memory"}, - "id": "1", - }, - { - "name": "core_memory_append", - "args": {"memory_content": "Second memory"}, - "id": "2", - }, - { - "name": "core_memory_replace", - "args": { - "original_fragment": "Initial memory content", - "new_fragment": "Third memory", - }, - "id": "3", - }, - ], - ) - ], - ) - - new_state = self.node.run(state, {}) - self.assertEqual(len(new_state.memory_collection_messages), 4) - self.assertEqual(new_state.memory_collection_messages[1].content, "Memory appended.") - self.assertEqual(new_state.memory_collection_messages[1].type, "tool") - self.assertEqual(new_state.memory_collection_messages[1].tool_call_id, "1") - self.assertEqual(new_state.memory_collection_messages[2].content, "Memory appended.") - self.assertEqual(new_state.memory_collection_messages[2].type, "tool") - self.assertEqual(new_state.memory_collection_messages[2].tool_call_id, "2") - self.assertEqual(new_state.memory_collection_messages[3].content, "Memory replaced.") - self.assertEqual(new_state.memory_collection_messages[3].type, "tool") - self.assertEqual(new_state.memory_collection_messages[3].tool_call_id, "3") - - self.core_memory.refresh_from_db() - self.assertEqual(self.core_memory.text, "Third memory\nFirst memory\nSecond memory") - - def test_handles_replacing_memory(self): - # Test replacing a memory fragment - state = AssistantState( - messages=[], - memory_collection_messages=[ - LangchainAIMessage( - content="Replacing memory", - tool_calls=[ - { - "name": "core_memory_replace", - "args": { - "original_fragment": "Initial memory", - "new_fragment": "Updated memory", - }, - "id": "1", - } - ], - ) - ], - ) - - new_state = self.node.run(state, {}) - self.assertEqual(len(new_state.memory_collection_messages), 2) - self.assertEqual(new_state.memory_collection_messages[1].content, "Memory replaced.") - self.assertEqual(new_state.memory_collection_messages[1].type, "tool") - self.assertEqual(new_state.memory_collection_messages[1].tool_call_id, "1") - self.core_memory.refresh_from_db() - self.assertEqual(self.core_memory.text, "Updated memory content") - - def test_handles_replace_memory_not_found(self): - # Test replacing a memory fragment that doesn't exist - state = AssistantState( - messages=[], - memory_collection_messages=[ - LangchainAIMessage( - content="Replacing non-existent memory", - tool_calls=[ - { - "name": "core_memory_replace", - "args": { - "original_fragment": "Non-existent memory", - "new_fragment": "New memory", - }, - "id": "1", - } - ], - ) - ], - ) - - new_state = self.node.run(state, {}) - self.assertEqual(len(new_state.memory_collection_messages), 2) - self.assertIn("not found", new_state.memory_collection_messages[1].content.lower()) - self.assertEqual(new_state.memory_collection_messages[1].type, "tool") - self.assertEqual(new_state.memory_collection_messages[1].tool_call_id, "1") - self.core_memory.refresh_from_db() - self.assertEqual(self.core_memory.text, "Initial memory content") - - def test_handles_appending_new_memory(self): - # Test appending a new memory fragment - state = AssistantState( - messages=[], - memory_collection_messages=[ - LangchainAIMessage( - content="Appending memory", - tool_calls=[ - { - "name": "core_memory_append", - "args": {"memory_content": "Additional memory"}, - "id": "1", - } - ], - ) - ], - ) - - new_state = self.node.run(state, {}) - self.assertEqual(len(new_state.memory_collection_messages), 2) - self.assertEqual(new_state.memory_collection_messages[1].content, "Memory appended.") - self.assertEqual(new_state.memory_collection_messages[1].type, "tool") - self.core_memory.refresh_from_db() - self.assertEqual(self.core_memory.text, "Initial memory content\nAdditional memory") - - def test_error_when_no_memory_collection_messages(self): - # Test error when no memory collection messages are present - state = AssistantState(messages=[], memory_collection_messages=[]) - - with self.assertRaises(ValueError) as e: - self.node.run(state, {}) - self.assertEqual(str(e.exception), "No memory collection messages found.") - - def test_error_when_last_message_not_ai(self): - # Test error when last message is not an AI message - state = AssistantState( - messages=[], - memory_collection_messages=[LangchainToolMessage(content="Not an AI message", tool_call_id="1")], - ) - - with self.assertRaises(ValueError) as e: - self.node.run(state, {}) - self.assertEqual(str(e.exception), "Last message must be an AI message.") - - def test_error_when_no_core_memory(self): - # Test error when core memory is not found - self.core_memory.delete() - state = AssistantState( - messages=[], - memory_collection_messages=[ - LangchainAIMessage( - content="Memory operation", - tool_calls=[ - { - "name": "core_memory_append", - "args": {"memory_content": "New memory"}, - "id": "1", - } - ], - ) - ], - ) - - with self.assertRaises(ValueError) as e: - self.node.run(state, {}) - self.assertEqual(str(e.exception), "No core memory found.") diff --git a/ee/hogai/memory/test/test_parsers.py b/ee/hogai/memory/test/test_parsers.py deleted file mode 100644 index 8f98d18815..0000000000 --- a/ee/hogai/memory/test/test_parsers.py +++ /dev/null @@ -1,22 +0,0 @@ -from langchain_core.messages import AIMessage - -from ee.hogai.memory.parsers import MemoryCollectionCompleted, compressed_memory_parser, raise_memory_updated -from posthog.test.base import BaseTest - - -class TestParsers(BaseTest): - def test_compressed_memory_parser(self): - memory = "Hello\n\nWorld " - self.assertEqual(compressed_memory_parser(memory), "Hello\nWorld ") - - def test_raise_memory_updated(self): - message = AIMessage(content="Hello World") - with self.assertRaises(MemoryCollectionCompleted): - raise_memory_updated(message) - - message = AIMessage(content="[Done]", tool_calls=[{"id": "1", "args": {}, "name": "function"}]) - with self.assertRaises(MemoryCollectionCompleted): - raise_memory_updated(message) - - message = AIMessage(content="Reasoning", tool_calls=[{"id": "1", "args": {}, "name": "function"}]) - self.assertEqual(raise_memory_updated(message), message) diff --git a/ee/hogai/retention/__init__.py b/ee/hogai/retention/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/hogai/retention/nodes.py b/ee/hogai/retention/nodes.py deleted file mode 100644 index 4a02854834..0000000000 --- a/ee/hogai/retention/nodes.py +++ /dev/null @@ -1,50 +0,0 @@ -from langchain_core.prompts import ChatPromptTemplate -from langchain_core.runnables import RunnableConfig - -from ee.hogai.retention.prompts import RETENTION_SYSTEM_PROMPT, REACT_SYSTEM_PROMPT -from ee.hogai.retention.toolkit import RETENTION_SCHEMA, RetentionTaxonomyAgentToolkit -from ee.hogai.schema_generator.nodes import SchemaGeneratorNode, SchemaGeneratorToolsNode -from ee.hogai.schema_generator.utils import SchemaGeneratorOutput -from ee.hogai.taxonomy_agent.nodes import TaxonomyAgentPlannerNode, TaxonomyAgentPlannerToolsNode -from ee.hogai.utils.types import AssistantState, PartialAssistantState -from posthog.schema import AssistantRetentionQuery - - -class RetentionPlannerNode(TaxonomyAgentPlannerNode): - def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistantState: - toolkit = RetentionTaxonomyAgentToolkit(self._team) - prompt = ChatPromptTemplate.from_messages( - [ - ("system", REACT_SYSTEM_PROMPT), - ], - template_format="mustache", - ) - return super()._run_with_prompt_and_toolkit(state, prompt, toolkit, config=config) - - -class RetentionPlannerToolsNode(TaxonomyAgentPlannerToolsNode): - def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistantState: - toolkit = RetentionTaxonomyAgentToolkit(self._team) - return super()._run_with_toolkit(state, toolkit, config=config) - - -RetentionSchemaGeneratorOutput = SchemaGeneratorOutput[AssistantRetentionQuery] - - -class RetentionGeneratorNode(SchemaGeneratorNode[AssistantRetentionQuery]): - INSIGHT_NAME = "Retention" - OUTPUT_MODEL = RetentionSchemaGeneratorOutput - OUTPUT_SCHEMA = RETENTION_SCHEMA - - def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistantState: - prompt = ChatPromptTemplate.from_messages( - [ - ("system", RETENTION_SYSTEM_PROMPT), - ], - template_format="mustache", - ) - return super()._run_with_prompt(state, prompt, config=config) - - -class RetentionGeneratorToolsNode(SchemaGeneratorToolsNode): - pass diff --git a/ee/hogai/retention/prompts.py b/ee/hogai/retention/prompts.py deleted file mode 100644 index 39adcff50b..0000000000 --- a/ee/hogai/retention/prompts.py +++ /dev/null @@ -1,88 +0,0 @@ -REACT_SYSTEM_PROMPT = """ -<agent_info> -You are an expert product analyst agent specializing in data visualization and retention analysis. Your primary task is to understand a user's data taxonomy and create a plan for building a visualization that answers the user's question. This plan should focus on retention insights, including the target event, returning event, property filters, and values of property filters. - -<core_memory> -{{core_memory}} -</core_memory> - -{{react_format}} -</agent_info> - -{{react_human_in_the_loop}} - -Below you will find information on how to correctly discover the taxonomy of the user's data. - -<general_knowledge> -Retention is a type of insight that shows you how many users return during subsequent periods. - -They're useful for answering questions like: -- Are new sign ups coming back to use your product after trying it? -- Have recent changes improved retention? -</general_knowledge> - -<events> -You'll be given a list of events in addition to the user's question. Events are sorted by their popularity with the most popular events at the top of the list. Prioritize popular events. You must always specify events to use. Events always have an associated user's profile. Assess whether the chosen events suffice to answer the question before applying property filters. Retention insights do not require filters by default. - -Plans of retention insights must always have two events: -- The activation event – an event that determines if the user is a part of a cohort. -- The retention event – an event that determines whether a user has been retained. - -For activation and retention events, use the `$pageview` event by default or the equivalent for mobile apps `$screen`. Avoid infrequent or inconsistent events like `signed in` unless asked explicitly, as they skew the data. -</events> - -{{react_property_filters}} - -<reminders> -- Ensure that any properties included are directly relevant to the context and objectives of the user's question. Avoid unnecessary or unrelated details. -- Avoid overcomplicating the response with excessive property filters. Focus on the simplest solution that effectively answers the user's question. -</reminders> ---- - -{{react_format_reminder}} -""" - -RETENTION_SYSTEM_PROMPT = """ -Act as an expert product manager. Your task is to generate a JSON schema of retention insights. You will be given a generation plan describing an target event, returning event, target/returning parameters, and filters. Use the plan and following instructions to create a correct query answering the user's question. - -Below is the additional context. - -Follow this instruction to create a query: -* Build the insight according to the plan. Properties can be of multiple types: String, Numeric, Bool, and DateTime. A property can be an array of those types and only has a single type. -* When evaluating filter operators, replace the `equals` or `doesn't equal` operators with `contains` or `doesn't contain` if the query value is likely a personal name, company name, or any other name-sensitive term where letter casing matters. For instance, if the value is β€˜John Doe' or β€˜Acme Corp', replace `equals` with `contains` and change the value to lowercase from `John Doe` to `john doe` or `Acme Corp` to `acme corp`. -* Determine the activation type that will answer the user's question in the best way. Use the provided defaults. -* Determine the retention period and number of periods to look back. -* Determine if the user wants to filter out internal and test users. If the user didn't specify, filter out internal and test users by default. -* Determine if you need to apply a sampling factor. Only specify those if the user has explicitly asked. -* Use your judgment if there are any other parameters that the user might want to adjust that aren't listed here. - -The user might want to receive insights about groups. A group aggregates events based on entities, such as organizations or sellers. The user might provide a list of group names and their numeric indexes. Instead of a group's name, always use its numeric index. - -Retention can be aggregated by: -- Unique users (default, do not specify anything to use it). Use this option unless the user states otherwise. -- Unique groups (specify the group index using `aggregation_group_type_index`) according to the group mapping. - -## Schema Examples - -### Question: How do new users of insights retain? - -Plan: -``` -Target event: -insight created - -Returning event: -insight saved -``` - -Output: -``` -{"kind":"RetentionQuery","retentionFilter":{"period":"Week","totalIntervals":9,"targetEntity":{"id":"insight created","name":"insight created","type":"events","order":0},"returningEntity":{"id":"insight created","name":"insight created","type":"events","order":0},"retentionType":"retention_first_time","retentionReference":"total","cumulative":false},"filterTestAccounts":true} -``` - -Obey these rules: -- Filter internal users by default if the user doesn't specify. -- You can't create new events or property definitions. Stick to the plan. - -Remember, your efforts will be rewarded by the company's founders. Do not hallucinate. -""" diff --git a/ee/hogai/retention/test/test_nodes.py b/ee/hogai/retention/test/test_nodes.py deleted file mode 100644 index 5036dff215..0000000000 --- a/ee/hogai/retention/test/test_nodes.py +++ /dev/null @@ -1,50 +0,0 @@ -from unittest.mock import patch - -from django.test import override_settings -from langchain_core.runnables import RunnableLambda - -from ee.hogai.retention.nodes import RetentionGeneratorNode, RetentionSchemaGeneratorOutput -from ee.hogai.utils.types import AssistantState, PartialAssistantState -from posthog.schema import ( - AssistantRetentionQuery, - HumanMessage, - AssistantRetentionFilter, - VisualizationMessage, -) -from posthog.test.base import APIBaseTest, ClickhouseTestMixin - - -@override_settings(IN_UNIT_TESTING=True) -class TestRetentionGeneratorNode(ClickhouseTestMixin, APIBaseTest): - maxDiff = None - - def setUp(self): - super().setUp() - self.schema = AssistantRetentionQuery( - retentionFilter=AssistantRetentionFilter( - targetEntity={"id": "targetEntity", "type": "events", "name": "targetEntity"}, - returningEntity={"id": "returningEntity", "type": "events", "name": "returningEntity"}, - ) - ) - - def test_node_runs(self): - node = RetentionGeneratorNode(self.team) - with patch.object(RetentionGeneratorNode, "_model") as generator_model_mock: - generator_model_mock.return_value = RunnableLambda( - lambda _: RetentionSchemaGeneratorOutput(query=self.schema).model_dump() - ) - new_state = node.run( - AssistantState( - messages=[HumanMessage(content="Text")], - plan="Plan", - ), - {}, - ) - self.assertEqual( - new_state, - PartialAssistantState( - messages=[VisualizationMessage(answer=self.schema, plan="Plan", id=new_state.messages[0].id)], - intermediate_steps=[], - plan="", - ), - ) diff --git a/ee/hogai/retention/toolkit.py b/ee/hogai/retention/toolkit.py deleted file mode 100644 index 966d29c7f9..0000000000 --- a/ee/hogai/retention/toolkit.py +++ /dev/null @@ -1,57 +0,0 @@ -from ee.hogai.taxonomy_agent.toolkit import TaxonomyAgentToolkit, ToolkitTool -from ee.hogai.utils.helpers import dereference_schema -from posthog.schema import AssistantRetentionQuery - - -class RetentionTaxonomyAgentToolkit(TaxonomyAgentToolkit): - def _get_tools(self) -> list[ToolkitTool]: - return [ - *self._default_tools, - { - "name": "final_answer", - "signature": "(final_response: str)", - "description": """ -Use this tool to provide the final answer to the user's question. - -Answer in the following format: -``` -Activation event: -chosen event - -Retention event: -chosen event (can be the same as activation event, or different) - -(if filters are used) -Filters: - - property filter 1: - - entity - - property name - - property type - - operator - - property value - - property filter 2... Repeat for each property filter. -``` - -Args: - final_response: List all events and properties that you want to use to answer the question.""", - }, - ] - - -def generate_retention_schema() -> dict: - schema = AssistantRetentionQuery.model_json_schema() - return { - "name": "output_insight_schema", - "description": "Outputs the JSON schema of a product analytics insight", - "parameters": { - "type": "object", - "properties": { - "query": dereference_schema(schema), - }, - "additionalProperties": False, - "required": ["query"], - }, - } - - -RETENTION_SCHEMA = generate_retention_schema() diff --git a/ee/hogai/router/__init__.py b/ee/hogai/router/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/hogai/router/nodes.py b/ee/hogai/router/nodes.py deleted file mode 100644 index fac5029f14..0000000000 --- a/ee/hogai/router/nodes.py +++ /dev/null @@ -1,64 +0,0 @@ -from typing import Literal, cast -from uuid import uuid4 - -from langchain_core.messages import AIMessage as LangchainAIMessage, BaseMessage -from langchain_core.prompts import ChatPromptTemplate -from langchain_core.runnables import RunnableConfig -from langchain_openai import ChatOpenAI -from pydantic import BaseModel, Field - -from ee.hogai.router.prompts import ( - ROUTER_INSIGHT_DESCRIPTION_PROMPT, - ROUTER_SYSTEM_PROMPT, - ROUTER_USER_PROMPT, -) -from ee.hogai.utils.nodes import AssistantNode -from ee.hogai.utils.types import AssistantState, PartialAssistantState -from posthog.schema import HumanMessage, RouterMessage - -RouteName = Literal["trends", "funnel", "retention"] - - -class RouterOutput(BaseModel): - visualization_type: Literal["trends", "funnel", "retention"] = Field( - ..., description=ROUTER_INSIGHT_DESCRIPTION_PROMPT - ) - - -class RouterNode(AssistantNode): - def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistantState: - prompt = ChatPromptTemplate.from_messages( - [ - ("system", ROUTER_SYSTEM_PROMPT), - ], - template_format="mustache", - ) + self._construct_messages(state) - chain = prompt | self._model - output: RouterOutput = chain.invoke({}, config) - return PartialAssistantState(messages=[RouterMessage(content=output.visualization_type, id=str(uuid4()))]) - - def router(self, state: AssistantState) -> RouteName: - last_message = state.messages[-1] - if isinstance(last_message, RouterMessage): - return cast(RouteName, last_message.content) - raise ValueError("Invalid route.") - - @property - def _model(self): - return ChatOpenAI(model="gpt-4o-mini", temperature=0, disable_streaming=True).with_structured_output( - RouterOutput - ) - - def _construct_messages(self, state: AssistantState): - history: list[BaseMessage] = [] - for message in state.messages: - if isinstance(message, HumanMessage): - history += ChatPromptTemplate.from_messages( - [("user", ROUTER_USER_PROMPT.strip())], template_format="mustache" - ).format_messages(question=message.content) - elif isinstance(message, RouterMessage): - history += [ - # AIMessage with the tool call - LangchainAIMessage(content=message.content), - ] - return history diff --git a/ee/hogai/router/prompts.py b/ee/hogai/router/prompts.py deleted file mode 100644 index d72c357061..0000000000 --- a/ee/hogai/router/prompts.py +++ /dev/null @@ -1,55 +0,0 @@ -ROUTER_SYSTEM_PROMPT = """ -Act as an expert product manager. Your task is to classify the insight type providing the best visualization to answer the user's question. - -Examples: - -Q: How many users signed up last week from the US? -A: The insight type is "trends". The request asks for an event count from unique users from a specific country. - -Q: What is the onboarding conversion rate? -A: The insight type is "funnels". The request explicitly asks for a conversion rate. Next steps should find at least two events to build this insight. - -Q: What is the ratio of $identify divided by page views? -A: The insight type is "trends". The request asks for a custom formula, which the trends visualization supports. - -Q: How many users returned to the product after signing up? -A: The insight type is "retention". The request asks for a retention analysis. -""" - -ROUTER_INSIGHT_DESCRIPTION_PROMPT = f""" -Pick the most suitable visualization type for the user's question. - -## `trends` - -A trends insight visualizes events over time using time series. They're useful for finding patterns in historical data. - -Examples of use cases include: -- How the product's most important metrics change over time. -- Long-term patterns, or cycles in product's usage. -- The usage of different features side-by-side. -- How the properties of events vary using aggregation (sum, average, etc). -- Users can also visualize the same data points in a variety of ways. - -## `funnel` - -A funnel insight visualizes a sequence of events that users go through in a product. They use percentages as the primary aggregation type. Funnels typically use two or more series, so the conversation history should mention at least two events. - -Examples of use cases include: -- Conversion rates. -- Drop off steps. -- Steps with the highest friction and time to convert. -- If product changes are improving their funnel over time. - -## `retention` - -A retention insight visualizes how many users return to the product after performing some action. They're useful for understanding user engagement and retention. - -Examples of use cases include: -- How many users come back and perform an action after their first visit. -- How many users come back to perform action X after performing action Y. -- How often users return to use a specific feature. -""" - -ROUTER_USER_PROMPT = """ -Question: {{question}} -""" diff --git a/ee/hogai/router/test/__init__.py b/ee/hogai/router/test/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/hogai/router/test/test_nodes.py b/ee/hogai/router/test/test_nodes.py deleted file mode 100644 index 53074a381b..0000000000 --- a/ee/hogai/router/test/test_nodes.py +++ /dev/null @@ -1,64 +0,0 @@ -from typing import Any -from unittest.mock import patch - -from django.test import override_settings -from langchain_core.messages import AIMessage as LangchainAIMessage, HumanMessage as LangchainHumanMessage -from langchain_core.runnables import RunnableLambda - -from ee.hogai.router.nodes import RouterNode, RouterOutput -from ee.hogai.utils.types import AssistantState, PartialAssistantState -from posthog.schema import ( - HumanMessage, - RouterMessage, - VisualizationMessage, -) -from posthog.test.base import APIBaseTest, ClickhouseTestMixin - - -@override_settings(IN_UNIT_TESTING=True) -class TestRouterNode(ClickhouseTestMixin, APIBaseTest): - def test_router(self): - node = RouterNode(self.team) - state: Any = AssistantState(messages=[RouterMessage(content="trends")]) - self.assertEqual(node.router(state), "trends") - - def test_node_runs(self): - with patch( - "ee.hogai.router.nodes.RouterNode._model", - return_value=RunnableLambda(lambda _: RouterOutput(visualization_type="funnel")), - ): - node = RouterNode(self.team) - state: Any = AssistantState(messages=[HumanMessage(content="generate trends")]) - next_state = node.run(state, {}) - self.assertEqual( - next_state, - PartialAssistantState(messages=[RouterMessage(content="funnel", id=next_state.messages[0].id)]), - ) - - with patch( - "ee.hogai.router.nodes.RouterNode._model", - return_value=RunnableLambda(lambda _: RouterOutput(visualization_type="trends")), - ): - node = RouterNode(self.team) - state: Any = AssistantState(messages=[HumanMessage(content="generate trends")]) - next_state = node.run(state, {}) - self.assertEqual( - next_state, - PartialAssistantState(messages=[RouterMessage(content="trends", id=next_state.messages[0].id)]), - ) - - def test_node_reconstructs_conversation(self): - node = RouterNode(self.team) - state: Any = AssistantState(messages=[HumanMessage(content="generate trends")]) - self.assertEqual(node._construct_messages(state), [LangchainHumanMessage(content="Question: generate trends")]) - state = AssistantState( - messages=[ - HumanMessage(content="generate trends"), - RouterMessage(content="trends"), - VisualizationMessage(), - ] - ) - self.assertEqual( - node._construct_messages(state), - [LangchainHumanMessage(content="Question: generate trends"), LangchainAIMessage(content="trends")], - ) diff --git a/ee/hogai/schema_generator/__init__.py b/ee/hogai/schema_generator/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/hogai/schema_generator/nodes.py b/ee/hogai/schema_generator/nodes.py deleted file mode 100644 index 9dd0980ee2..0000000000 --- a/ee/hogai/schema_generator/nodes.py +++ /dev/null @@ -1,255 +0,0 @@ -import xml.etree.ElementTree as ET -from collections.abc import Sequence -from functools import cached_property -from typing import Generic, Optional, TypeVar -from uuid import uuid4 - -from langchain_core.agents import AgentAction -from langchain_core.messages import ( - AIMessage as LangchainAssistantMessage, - BaseMessage, - HumanMessage as LangchainHumanMessage, - merge_message_runs, -) -from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate -from langchain_core.runnables import RunnableConfig -from langchain_openai import ChatOpenAI -from pydantic import BaseModel, ValidationError - -from ee.hogai.schema_generator.parsers import ( - PydanticOutputParserException, - parse_pydantic_structured_output, -) -from ee.hogai.schema_generator.prompts import ( - FAILOVER_OUTPUT_PROMPT, - FAILOVER_PROMPT, - GROUP_MAPPING_PROMPT, - NEW_PLAN_PROMPT, - PLAN_PROMPT, - QUESTION_PROMPT, -) -from ee.hogai.schema_generator.utils import SchemaGeneratorOutput -from ee.hogai.utils.helpers import find_last_message_of_type, slice_messages_to_conversation_start -from ee.hogai.utils.nodes import AssistantNode -from ee.hogai.utils.types import AssistantMessageUnion, AssistantState, PartialAssistantState -from posthog.models.group_type_mapping import GroupTypeMapping -from posthog.schema import ( - AssistantMessage, - FailureMessage, - HumanMessage, - VisualizationMessage, -) - -Q = TypeVar("Q", bound=BaseModel) - - -class SchemaGeneratorNode(AssistantNode, Generic[Q]): - INSIGHT_NAME: str - """ - Name of the insight type used in the exception messages. - """ - OUTPUT_MODEL: type[SchemaGeneratorOutput[Q]] - """Pydantic model of the output to be generated by the LLM.""" - OUTPUT_SCHEMA: dict - """JSON schema of OUTPUT_MODEL for LLM's use.""" - - @property - def _model(self): - return ChatOpenAI(model="gpt-4o", temperature=0, streaming=True, stream_usage=True).with_structured_output( - self.OUTPUT_SCHEMA, - method="function_calling", - include_raw=False, - ) - - @classmethod - def parse_output(cls, output: dict) -> Optional[SchemaGeneratorOutput[Q]]: - try: - return cls.OUTPUT_MODEL.model_validate(output) - except ValidationError: - return None - - def _run_with_prompt( - self, - state: AssistantState, - prompt: ChatPromptTemplate, - config: Optional[RunnableConfig] = None, - ) -> PartialAssistantState: - start_id = state.start_id - generated_plan = state.plan or "" - intermediate_steps = state.intermediate_steps or [] - validation_error_message = intermediate_steps[-1][1] if intermediate_steps else None - - generation_prompt = prompt + self._construct_messages(state, validation_error_message=validation_error_message) - merger = merge_message_runs() - parser = parse_pydantic_structured_output(self.OUTPUT_MODEL) - - chain = generation_prompt | merger | self._model | parser - - try: - message: SchemaGeneratorOutput[Q] = chain.invoke({}, config) - except PydanticOutputParserException as e: - # Generation step is expensive. After a second unsuccessful attempt, it's better to send a failure message. - if len(intermediate_steps) >= 2: - return PartialAssistantState( - messages=[ - FailureMessage( - content=f"Oops! It looks like I’m having trouble generating this {self.INSIGHT_NAME} insight. Could you please try again?" - ) - ], - intermediate_steps=[], - plan="", - ) - - return PartialAssistantState( - intermediate_steps=[ - *intermediate_steps, - (AgentAction("handle_incorrect_response", e.llm_output, e.validation_message), None), - ], - ) - - final_message = VisualizationMessage( - plan=generated_plan, - answer=message.query, - initiator=start_id, - id=str(uuid4()), - ) - - return PartialAssistantState( - messages=[final_message], - intermediate_steps=[], - plan="", - ) - - def router(self, state: AssistantState): - if state.intermediate_steps: - return "tools" - return "next" - - @cached_property - def _group_mapping_prompt(self) -> str: - groups = GroupTypeMapping.objects.filter(project_id=self._team.project_id).order_by("group_type_index") - if not groups: - return "The user has not defined any groups." - - root = ET.Element("list of defined groups") - root.text = ( - "\n" + "\n".join([f'name "{group.group_type}", index {group.group_type_index}' for group in groups]) + "\n" - ) - return ET.tostring(root, encoding="unicode") - - def _get_human_viz_message_mapping(self, messages: Sequence[AssistantMessageUnion]) -> dict[str, int]: - mapping: dict[str, int] = {} - for idx, msg in enumerate(messages): - if isinstance(msg, VisualizationMessage) and msg.initiator is not None: - mapping[msg.initiator] = idx - return mapping - - def _construct_messages( - self, state: AssistantState, validation_error_message: Optional[str] = None - ) -> list[BaseMessage]: - """ - Reconstruct the conversation for the generation. Take all previously generated questions, plans, and schemas, and return the history. - """ - messages = state.messages - generated_plan = state.plan - start_id = state.start_id - - if start_id is not None: - messages = slice_messages_to_conversation_start(messages, start_id) - if len(messages) == 0: - return [] - - conversation: list[BaseMessage] = [ - HumanMessagePromptTemplate.from_template(GROUP_MAPPING_PROMPT, template_format="mustache").format( - group_mapping=self._group_mapping_prompt - ) - ] - - msg_mapping = self._get_human_viz_message_mapping(messages) - initiator_message = messages[-1] - last_viz_message = find_last_message_of_type(messages, VisualizationMessage) - - for message in messages: - # The initial human message and the new plan are added to the end of the conversation. - if message == initiator_message: - continue - if isinstance(message, HumanMessage): - if message.id and (viz_message_idx := msg_mapping.get(message.id)): - # Plans go first. - viz_message = messages[viz_message_idx] - if isinstance(viz_message, VisualizationMessage): - conversation.append( - HumanMessagePromptTemplate.from_template(PLAN_PROMPT, template_format="mustache").format( - plan=viz_message.plan or "" - ) - ) - - # Augment with the prompt previous initiator messages. - conversation.append( - HumanMessagePromptTemplate.from_template(QUESTION_PROMPT, template_format="mustache").format( - question=message.content - ) - ) - # Otherwise, just append the human message. - else: - conversation.append(LangchainHumanMessage(content=message.content)) - # Summary, human-in-the-loop messages. - elif isinstance(message, AssistantMessage): - conversation.append(LangchainAssistantMessage(content=message.content)) - - # Include only last generated schema because it doesn't need more context. - if last_viz_message: - conversation.append( - LangchainAssistantMessage( - content=last_viz_message.answer.model_dump_json() if last_viz_message.answer else "" - ) - ) - # Add the initiator message and the generated plan to the end, so instructions are clear. - if isinstance(initiator_message, HumanMessage): - if generated_plan: - plan_prompt = PLAN_PROMPT if messages[0] == initiator_message else NEW_PLAN_PROMPT - conversation.append( - HumanMessagePromptTemplate.from_template(plan_prompt, template_format="mustache").format( - plan=generated_plan or "" - ) - ) - conversation.append( - HumanMessagePromptTemplate.from_template(QUESTION_PROMPT, template_format="mustache").format( - question=initiator_message.content - ) - ) - - # Retries must be added to the end of the conversation. - if validation_error_message: - conversation.append( - HumanMessagePromptTemplate.from_template(FAILOVER_PROMPT, template_format="mustache").format( - validation_error_message=validation_error_message - ) - ) - - return conversation - - -class SchemaGeneratorToolsNode(AssistantNode): - """ - Used for failover from generation errors. - """ - - def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistantState: - intermediate_steps = state.intermediate_steps or [] - if not intermediate_steps: - return PartialAssistantState() - - action, _ = intermediate_steps[-1] - prompt = ( - ChatPromptTemplate.from_template(FAILOVER_OUTPUT_PROMPT, template_format="mustache") - .format_messages(output=action.tool_input, exception_message=action.log)[0] - .content - ) - - return PartialAssistantState( - intermediate_steps=[ - *intermediate_steps[:-1], - (action, str(prompt)), - ] - ) diff --git a/ee/hogai/schema_generator/parsers.py b/ee/hogai/schema_generator/parsers.py deleted file mode 100644 index 569a563968..0000000000 --- a/ee/hogai/schema_generator/parsers.py +++ /dev/null @@ -1,28 +0,0 @@ -import json -from collections.abc import Callable - -from pydantic import BaseModel, ValidationError - - -class PydanticOutputParserException(ValueError): - llm_output: str - """Serialized LLM output.""" - validation_message: str - """Pydantic validation error message.""" - - def __init__(self, llm_output: str, validation_message: str): - super().__init__(llm_output) - self.llm_output = llm_output - self.validation_message = validation_message - - -def parse_pydantic_structured_output(model: type[BaseModel]) -> Callable[[dict], BaseModel]: - def parser(output: dict) -> BaseModel: - try: - return model.model_validate(output) - except ValidationError as e: - raise PydanticOutputParserException( - llm_output=json.dumps(output), validation_message=e.json(include_url=False) - ) - - return parser diff --git a/ee/hogai/schema_generator/prompts.py b/ee/hogai/schema_generator/prompts.py deleted file mode 100644 index 20e4269d4f..0000000000 --- a/ee/hogai/schema_generator/prompts.py +++ /dev/null @@ -1,38 +0,0 @@ -GROUP_MAPPING_PROMPT = """ -Here is the group mapping: -{{group_mapping}} -""" - -PLAN_PROMPT = """ -Here is the plan: -{{plan}} -""" - -NEW_PLAN_PROMPT = """ -Here is the new plan: -{{plan}} -""" - -QUESTION_PROMPT = """ -Answer to this question: {{question}} -""" - -FAILOVER_OUTPUT_PROMPT = """ -Generation output: -``` -{{output}} -``` - -Exception message: -``` -{{exception_message}} -``` -""" - -FAILOVER_PROMPT = """ -The result of the previous generation raised the Pydantic validation exception. - -{{validation_error_message}} - -Fix the error and return the correct response. -""" diff --git a/ee/hogai/schema_generator/test/__init__.py b/ee/hogai/schema_generator/test/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/hogai/schema_generator/test/test_nodes.py b/ee/hogai/schema_generator/test/test_nodes.py deleted file mode 100644 index 3b2702b55b..0000000000 --- a/ee/hogai/schema_generator/test/test_nodes.py +++ /dev/null @@ -1,425 +0,0 @@ -import json -from unittest.mock import patch - -from django.test import override_settings -from langchain_core.agents import AgentAction -from langchain_core.prompts import ChatPromptTemplate -from langchain_core.runnables import RunnableConfig, RunnableLambda - -from ee.hogai.schema_generator.nodes import SchemaGeneratorNode, SchemaGeneratorToolsNode -from ee.hogai.schema_generator.utils import SchemaGeneratorOutput -from ee.hogai.utils.types import AssistantState, PartialAssistantState -from posthog.schema import ( - AssistantMessage, - AssistantTrendsQuery, - FailureMessage, - HumanMessage, - RouterMessage, - VisualizationMessage, -) -from posthog.test.base import BaseTest - -TestSchema = SchemaGeneratorOutput[AssistantTrendsQuery] - - -class DummyGeneratorNode(SchemaGeneratorNode[AssistantTrendsQuery]): - INSIGHT_NAME = "Test" - OUTPUT_MODEL = SchemaGeneratorOutput[AssistantTrendsQuery] - OUTPUT_SCHEMA = {} - - def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistantState: - prompt = ChatPromptTemplate.from_messages( - [ - ("system", "system_prompt"), - ], - ) - return super()._run_with_prompt(state, prompt, config=config) - - -@override_settings(IN_UNIT_TESTING=True) -class TestSchemaGeneratorNode(BaseTest): - def setUp(self): - super().setUp() - self.schema = AssistantTrendsQuery(series=[]) - - def test_node_runs(self): - node = DummyGeneratorNode(self.team) - with patch.object(DummyGeneratorNode, "_model") as generator_model_mock: - generator_model_mock.return_value = RunnableLambda(lambda _: TestSchema(query=self.schema).model_dump()) - new_state = node.run( - AssistantState( - messages=[HumanMessage(content="Text", id="0")], - plan="Plan", - start_id="0", - ), - {}, - ) - self.assertEqual(new_state.intermediate_steps, []) - self.assertEqual(new_state.plan, "") - self.assertEqual(len(new_state.messages), 1) - self.assertEqual(new_state.messages[0].type, "ai/viz") - self.assertEqual(new_state.messages[0].answer, self.schema) - - def test_agent_reconstructs_conversation_and_does_not_add_an_empty_plan(self): - node = DummyGeneratorNode(self.team) - history = node._construct_messages( - AssistantState(messages=[HumanMessage(content="Text", id="0")], start_id="0") - ) - self.assertEqual(len(history), 2) - self.assertEqual(history[0].type, "human") - self.assertIn("mapping", history[0].content) - self.assertEqual(history[1].type, "human") - self.assertIn("Answer to this question:", history[1].content) - self.assertNotIn("{{question}}", history[1].content) - - def test_agent_reconstructs_conversation_adds_plan(self): - node = DummyGeneratorNode(self.team) - history = node._construct_messages( - AssistantState(messages=[HumanMessage(content="Text", id="0")], plan="randomplan", start_id="0") - ) - self.assertEqual(len(history), 3) - self.assertEqual(history[0].type, "human") - self.assertIn("mapping", history[0].content) - self.assertEqual(history[1].type, "human") - self.assertIn("the plan", history[1].content) - self.assertNotIn("{{plan}}", history[1].content) - self.assertIn("randomplan", history[1].content) - self.assertEqual(history[2].type, "human") - self.assertIn("Answer to this question:", history[2].content) - self.assertNotIn("{{question}}", history[2].content) - self.assertIn("Text", history[2].content) - - def test_agent_reconstructs_conversation_can_handle_follow_ups(self): - node = DummyGeneratorNode(self.team) - history = node._construct_messages( - AssistantState( - messages=[ - HumanMessage(content="Text", id="0"), - VisualizationMessage(answer=self.schema, plan="randomplan", id="1", initiator="0"), - HumanMessage(content="Follow Up", id="2"), - ], - plan="newrandomplan", - start_id="2", - ) - ) - - self.assertEqual(len(history), 6) - self.assertEqual(history[0].type, "human") - self.assertIn("mapping", history[0].content) - self.assertEqual(history[1].type, "human") - self.assertIn("the plan", history[1].content) - self.assertNotIn("{{plan}}", history[1].content) - self.assertIn("randomplan", history[1].content) - self.assertEqual(history[2].type, "human") - self.assertIn("Answer to this question:", history[2].content) - self.assertNotIn("{{question}}", history[2].content) - self.assertIn("Text", history[2].content) - self.assertEqual(history[3].type, "ai") - self.assertEqual(history[3].content, self.schema.model_dump_json()) - self.assertEqual(history[4].type, "human") - self.assertIn("the new plan", history[4].content) - self.assertNotIn("{{plan}}", history[4].content) - self.assertIn("newrandomplan", history[4].content) - self.assertEqual(history[5].type, "human") - self.assertIn("Answer to this question:", history[5].content) - self.assertNotIn("{{question}}", history[5].content) - self.assertIn("Follow Up", history[5].content) - - def test_agent_reconstructs_conversation_and_does_not_merge_messages(self): - node = DummyGeneratorNode(self.team) - history = node._construct_messages( - AssistantState( - messages=[HumanMessage(content="Te", id="0"), HumanMessage(content="xt", id="1")], - plan="randomplan", - start_id="1", - ) - ) - self.assertEqual(len(history), 4) - self.assertEqual(history[0].type, "human") - self.assertIn("mapping", history[0].content) - self.assertIn("Te", history[1].content) - self.assertEqual(history[2].type, "human") - self.assertNotIn("{{plan}}", history[2].content) - self.assertIn("randomplan", history[2].content) - self.assertEqual(history[3].type, "human") - self.assertIn("Answer to this question:", history[3].content) - self.assertNotIn("{{question}}", history[3].content) - self.assertEqual(history[3].type, "human") - self.assertIn("xt", history[3].content) - - def test_filters_out_human_in_the_loop_after_initiator(self): - node = DummyGeneratorNode(self.team) - history = node._construct_messages( - AssistantState( - messages=[ - HumanMessage(content="Text", id="0"), - VisualizationMessage(answer=self.schema, plan="randomplan", initiator="0", id="1"), - HumanMessage(content="Follow", id="2"), - HumanMessage(content="Up", id="3"), - ], - plan="newrandomplan", - start_id="0", - ) - ) - self.assertEqual(len(history), 3) - self.assertEqual(history[0].type, "human") - self.assertIn("mapping", history[0].content) - self.assertEqual(history[1].type, "human") - self.assertIn("the plan", history[1].content) - self.assertNotIn("{{plan}}", history[1].content) - self.assertIn("randomplan", history[1].content) - self.assertEqual(history[2].type, "human") - self.assertIn("Answer to this question:", history[2].content) - self.assertNotIn("{{question}}", history[2].content) - self.assertIn("Text", history[2].content) - - def test_preserves_human_in_the_loop_before_initiator(self): - node = DummyGeneratorNode(self.team) - history = node._construct_messages( - AssistantState( - messages=[ - HumanMessage(content="Question 1", id="0"), - AssistantMessage(content="Loop", id="1"), - HumanMessage(content="Answer", id="2"), - VisualizationMessage(answer=self.schema, plan="randomplan", initiator="0", id="3"), - HumanMessage(content="Question 2", id="4"), - ], - plan="newrandomplan", - start_id="4", - ) - ) - self.assertEqual(len(history), 8) - self.assertEqual(history[0].type, "human") - self.assertIn("mapping", history[0].content) - self.assertEqual(history[1].type, "human") - self.assertIn("the plan", history[1].content) - self.assertNotIn("{{plan}}", history[1].content) - self.assertIn("randomplan", history[1].content) - self.assertNotIn("{{question}}", history[2].content) - self.assertIn("Question 1", history[2].content) - self.assertEqual(history[3].type, "ai") - self.assertEqual("Loop", history[3].content) - self.assertEqual(history[4].type, "human") - self.assertEqual("Answer", history[4].content) - self.assertEqual(history[5].type, "ai") - self.assertEqual(history[6].type, "human") - self.assertIn("the new plan", history[6].content) - self.assertIn("newrandomplan", history[6].content) - self.assertEqual(history[7].type, "human") - self.assertNotIn("{{question}}", history[7].content) - self.assertIn("Question 2", history[7].content) - - def test_agent_reconstructs_typical_conversation(self): - node = DummyGeneratorNode(self.team) - history = node._construct_messages( - AssistantState( - messages=[ - HumanMessage(content="Question 1", id="0"), - RouterMessage(content="trends", id="1"), - VisualizationMessage(answer=AssistantTrendsQuery(series=[]), plan="Plan 1", initiator="0", id="2"), - AssistantMessage(content="Summary 1", id="3"), - HumanMessage(content="Question 2", id="4"), - RouterMessage(content="funnel", id="5"), - VisualizationMessage(answer=AssistantTrendsQuery(series=[]), plan="Plan 2", initiator="4", id="6"), - AssistantMessage(content="Summary 2", id="7"), - HumanMessage(content="Question 3", id="8"), - RouterMessage(content="funnel", id="9"), - ], - plan="Plan 3", - start_id="8", - ) - ) - - self.assertEqual(len(history), 10) - self.assertEqual(history[0].type, "human") - self.assertIn("mapping", history[0].content) - self.assertEqual(history[1].type, "human") - self.assertIn("Plan 1", history[1].content) - self.assertEqual(history[2].type, "human") - self.assertIn("Question 1", history[2].content) - self.assertEqual(history[3].type, "ai") - self.assertEqual(history[3].content, "Summary 1") - self.assertEqual(history[4].type, "human") - self.assertIn("Plan 2", history[4].content) - self.assertEqual(history[5].type, "human") - self.assertIn("Question 2", history[5].content) - self.assertEqual(history[6].type, "ai") - self.assertEqual(history[6].content, "Summary 2") - self.assertEqual(history[7].type, "ai") - self.assertEqual(history[8].type, "human") - self.assertIn("Plan 3", history[8].content) - self.assertEqual(history[9].type, "human") - self.assertIn("Question 3", history[9].content) - - def test_prompt_messages_merged(self): - node = DummyGeneratorNode(self.team) - state = AssistantState( - messages=[ - HumanMessage(content="Question 1", id="0"), - RouterMessage(content="trends", id="1"), - VisualizationMessage(answer=AssistantTrendsQuery(series=[]), plan="Plan 1", initiator="0", id="2"), - AssistantMessage(content="Summary 1", id="3"), - HumanMessage(content="Question 2", id="4"), - RouterMessage(content="funnel", id="5"), - VisualizationMessage(answer=AssistantTrendsQuery(series=[]), plan="Plan 2", initiator="4", id="6"), - AssistantMessage(content="Summary 2", id="7"), - HumanMessage(content="Question 3", id="8"), - RouterMessage(content="funnel", id="9"), - ], - plan="Plan 3", - start_id="8", - ) - with patch.object(DummyGeneratorNode, "_model") as generator_model_mock: - - def assert_prompt(prompt): - self.assertEqual(len(prompt), 6) - self.assertEqual(prompt[0].type, "system") - self.assertEqual(prompt[1].type, "human") - self.assertEqual(prompt[2].type, "ai") - self.assertEqual(prompt[3].type, "human") - self.assertEqual(prompt[4].type, "ai") - self.assertEqual(prompt[5].type, "human") - - generator_model_mock.return_value = RunnableLambda(assert_prompt) - node.run(state, {}) - - def test_failover_with_incorrect_schema(self): - node = DummyGeneratorNode(self.team) - with patch.object(DummyGeneratorNode, "_model") as generator_model_mock: - schema = TestSchema(query=None).model_dump() - # Emulate an incorrect JSON. It should be an object. - schema["query"] = [] - generator_model_mock.return_value = RunnableLambda(lambda _: json.dumps(schema)) - - new_state = node.run(AssistantState(messages=[HumanMessage(content="Text")]), {}) - self.assertEqual(len(new_state.intermediate_steps), 1) - - new_state = node.run( - AssistantState( - messages=[HumanMessage(content="Text")], - intermediate_steps=[(AgentAction(tool="", tool_input="", log="exception"), "exception")], - ), - {}, - ) - self.assertEqual(len(new_state.intermediate_steps), 2) - - def test_node_leaves_failover(self): - node = DummyGeneratorNode(self.team) - with patch.object( - DummyGeneratorNode, - "_model", - return_value=RunnableLambda(lambda _: TestSchema(query=self.schema).model_dump()), - ): - new_state = node.run( - AssistantState( - messages=[HumanMessage(content="Text")], - intermediate_steps=[(AgentAction(tool="", tool_input="", log="exception"), "exception")], - ), - {}, - ) - self.assertEqual(new_state.intermediate_steps, []) - - new_state = node.run( - AssistantState( - messages=[HumanMessage(content="Text")], - intermediate_steps=[ - (AgentAction(tool="", tool_input="", log="exception"), "exception"), - (AgentAction(tool="", tool_input="", log="exception"), "exception"), - ], - ), - {}, - ) - self.assertEqual(new_state.intermediate_steps, []) - - def test_node_leaves_failover_after_second_unsuccessful_attempt(self): - node = DummyGeneratorNode(self.team) - with patch.object(DummyGeneratorNode, "_model") as generator_model_mock: - schema = TestSchema(query=None).model_dump() - # Emulate an incorrect JSON. It should be an object. - schema["query"] = [] - generator_model_mock.return_value = RunnableLambda(lambda _: json.dumps(schema)) - - new_state = node.run( - AssistantState( - messages=[HumanMessage(content="Text")], - intermediate_steps=[ - (AgentAction(tool="", tool_input="", log="exception"), "exception"), - (AgentAction(tool="", tool_input="", log="exception"), "exception"), - ], - ), - {}, - ) - self.assertEqual(new_state.intermediate_steps, []) - self.assertEqual(len(new_state.messages), 1) - self.assertIsInstance(new_state.messages[0], FailureMessage) - self.assertEqual(new_state.plan, "") - - def test_agent_reconstructs_conversation_with_failover(self): - action = AgentAction(tool="fix", tool_input="validation error", log="exception") - node = DummyGeneratorNode(self.team) - history = node._construct_messages( - AssistantState( - messages=[HumanMessage(content="Text", id="0")], - plan="randomplan", - intermediate_steps=[(action, "uniqexception")], - start_id="0", - ), - validation_error_message="uniqexception", - ) - self.assertEqual(len(history), 4) - self.assertEqual(history[0].type, "human") - self.assertIn("mapping", history[0].content) - self.assertEqual(history[1].type, "human") - self.assertIn("the plan", history[1].content) - self.assertNotIn("{{plan}}", history[1].content) - self.assertIn("randomplan", history[1].content) - self.assertEqual(history[2].type, "human") - self.assertIn("Answer to this question:", history[2].content) - self.assertNotIn("{{question}}", history[2].content) - self.assertIn("Text", history[2].content) - self.assertEqual(history[3].type, "human") - self.assertIn("Pydantic", history[3].content) - self.assertIn("uniqexception", history[3].content) - - def test_agent_reconstructs_conversation_with_failed_messages(self): - node = DummyGeneratorNode(self.team) - history = node._construct_messages( - AssistantState( - messages=[ - HumanMessage(content="Text"), - FailureMessage(content="Error"), - HumanMessage(content="Text"), - ], - plan="randomplan", - ), - ) - self.assertEqual(len(history), 3) - self.assertEqual(history[0].type, "human") - self.assertIn("mapping", history[0].content) - self.assertEqual(history[1].type, "human") - self.assertIn("the plan", history[1].content) - self.assertNotIn("{{plan}}", history[1].content) - self.assertIn("randomplan", history[1].content) - self.assertEqual(history[2].type, "human") - self.assertIn("Answer to this question:", history[2].content) - self.assertNotIn("{{question}}", history[2].content) - self.assertIn("Text", history[2].content) - - def test_router(self): - node = DummyGeneratorNode(self.team) - state = node.router(AssistantState(messages=[], intermediate_steps=None)) - self.assertEqual(state, "next") - state = node.router( - AssistantState(messages=[], intermediate_steps=[(AgentAction(tool="", tool_input="", log=""), None)]) - ) - self.assertEqual(state, "tools") - - -class TestSchemaGeneratorToolsNode(BaseTest): - def test_tools_node(self): - node = SchemaGeneratorToolsNode(self.team) - action = AgentAction(tool="fix", tool_input="validationerror", log="pydanticexception") - state = node.run(AssistantState(messages=[], intermediate_steps=[(action, None)]), {}) - self.assertIsNotNone("validationerror", state.intermediate_steps[0][1]) - self.assertIn("validationerror", state.intermediate_steps[0][1]) - self.assertIn("pydanticexception", state.intermediate_steps[0][1]) diff --git a/ee/hogai/schema_generator/utils.py b/ee/hogai/schema_generator/utils.py deleted file mode 100644 index 8d0f8db4de..0000000000 --- a/ee/hogai/schema_generator/utils.py +++ /dev/null @@ -1,9 +0,0 @@ -from typing import Generic, Optional, TypeVar - -from pydantic import BaseModel - -T = TypeVar("T", bound=BaseModel) - - -class SchemaGeneratorOutput(BaseModel, Generic[T]): - query: Optional[T] = None diff --git a/ee/hogai/summarizer/__init__.py b/ee/hogai/summarizer/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/hogai/summarizer/nodes.py b/ee/hogai/summarizer/nodes.py deleted file mode 100644 index 394f14a02b..0000000000 --- a/ee/hogai/summarizer/nodes.py +++ /dev/null @@ -1,114 +0,0 @@ -import datetime -import json -from time import sleep -from uuid import uuid4 - -from django.conf import settings -from django.core.serializers.json import DjangoJSONEncoder -from django.utils import timezone -from langchain_core.prompts import ChatPromptTemplate -from langchain_core.runnables import RunnableConfig -from langchain_openai import ChatOpenAI -from rest_framework.exceptions import APIException -from sentry_sdk import capture_exception - -from ee.hogai.summarizer.prompts import SUMMARIZER_INSTRUCTION_PROMPT, SUMMARIZER_SYSTEM_PROMPT -from ee.hogai.utils.nodes import AssistantNode -from ee.hogai.utils.types import AssistantNodeName, AssistantState, PartialAssistantState -from posthog.api.services.query import process_query_dict -from posthog.clickhouse.client.execute_async import get_query_status -from posthog.errors import ExposedCHQueryError -from posthog.hogql.errors import ExposedHogQLError -from posthog.hogql_queries.query_runner import ExecutionMode -from posthog.schema import AssistantMessage, FailureMessage, HumanMessage, VisualizationMessage - - -class SummarizerNode(AssistantNode): - name = AssistantNodeName.SUMMARIZER - - def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistantState: - viz_message = state.messages[-1] - if not isinstance(viz_message, VisualizationMessage): - raise ValueError("Can only run summarization with a visualization message as the last one in the state") - if viz_message.answer is None: - raise ValueError("Did not found query in the visualization message") - - try: - results_response = process_query_dict( # type: ignore - self._team, # TODO: Add user - viz_message.answer.model_dump(mode="json"), # We need mode="json" so that - # Celery doesn't run in tests, so there we use force_blocking instead - # This does mean that the waiting logic is not tested - execution_mode=ExecutionMode.RECENT_CACHE_CALCULATE_ASYNC_IF_STALE - if not settings.TEST - else ExecutionMode.CALCULATE_BLOCKING_ALWAYS, - ).model_dump(mode="json") - if results_response.get("query_status") and not results_response["query_status"]["complete"]: - query_id = results_response["query_status"]["id"] - for i in range(0, 999): - sleep(i / 2) # We start at 0.5s and every iteration we wait 0.5s more - query_status = get_query_status(team_id=self._team.pk, query_id=query_id) - if query_status.error: - if query_status.error_message: - raise APIException(query_status.error_message) - else: - raise ValueError("Query failed") - if query_status.complete: - results_response = query_status.results - break - except (APIException, ExposedHogQLError, ExposedCHQueryError) as err: - err_message = str(err) - if isinstance(err, APIException): - if isinstance(err.detail, dict): - err_message = ", ".join(f"{key}: {value}" for key, value in err.detail.items()) - elif isinstance(err.detail, list): - err_message = ", ".join(map(str, err.detail)) - return PartialAssistantState( - messages=[ - FailureMessage(content=f"There was an error running this query: {err_message}", id=str(uuid4())) - ] - ) - except Exception as err: - capture_exception(err) - return PartialAssistantState( - messages=[FailureMessage(content="There was an unknown error running this query.", id=str(uuid4()))] - ) - - summarization_prompt = ChatPromptTemplate(self._construct_messages(state), template_format="mustache") - - chain = summarization_prompt | self._model - - utc_now = timezone.now().astimezone(datetime.UTC) - project_now = utc_now.astimezone(self._team.timezone_info) - - message = chain.invoke( - { - "query_kind": viz_message.answer.kind, - "core_memory": self.core_memory_text, - "results": json.dumps(results_response["results"], cls=DjangoJSONEncoder), - "utc_datetime_display": utc_now.strftime("%Y-%m-%d %H:%M:%S"), - "project_datetime_display": project_now.strftime("%Y-%m-%d %H:%M:%S"), - "project_timezone": self._team.timezone_info.tzname(utc_now), - }, - config, - ) - - return PartialAssistantState(messages=[AssistantMessage(content=str(message.content), id=str(uuid4()))]) - - @property - def _model(self): - return ChatOpenAI( - model="gpt-4o", temperature=0.5, streaming=True, stream_usage=True - ) # Slightly higher temp than earlier steps - - def _construct_messages(self, state: AssistantState) -> list[tuple[str, str]]: - conversation: list[tuple[str, str]] = [("system", SUMMARIZER_SYSTEM_PROMPT)] - - for message in state.messages: - if isinstance(message, HumanMessage): - conversation.append(("human", message.content)) - elif isinstance(message, AssistantMessage): - conversation.append(("assistant", message.content)) - - conversation.append(("human", SUMMARIZER_INSTRUCTION_PROMPT)) - return conversation diff --git a/ee/hogai/summarizer/prompts.py b/ee/hogai/summarizer/prompts.py deleted file mode 100644 index 6d5d98eef5..0000000000 --- a/ee/hogai/summarizer/prompts.py +++ /dev/null @@ -1,31 +0,0 @@ -SUMMARIZER_SYSTEM_PROMPT = """ -Act as an expert product manager. Your task is to help the user build a successful product and business. -Also, you're a hedeghog named Max. - -Offer actionable feedback if possible. Only provide suggestions you're certain will be useful for this team. -Acknowledge when more information would be needed. When query results are provided, note that the user can already see the chart. - -Use Silicon Valley lingo. Be informal but get to the point immediately, without fluff - e.g. don't start with "alright, …". -NEVER use "Title Case", even in headings. Our style is "Sentence case" EVERYWHERE. -You can use Markdown for emphasis. Bullets can improve clarity of action points. - -<core_memory> -{{core_memory}} -</core_memory> -""" - -SUMMARIZER_INSTRUCTION_PROMPT = """ -Here are results of the {{query_kind}} you created to answer my latest question: - -```json -{{results}} -``` - -The current date and time is {{utc_datetime_display}} UTC, which is {{project_datetime_display}} in this project's timezone ({{project_timezone}}). -It's expected that the data point for the current period can have a drop in value, as it's not complete yet - don't point this out to me. - -Based on the results, answer my question and provide actionable feedback. Avoid generic advice. Take into account what you know about the product. -The answer needs to be high-impact, no more than a few sentences. - -You MUST point out if the executed query or its results are insufficient for a full answer to my question. -""" diff --git a/ee/hogai/summarizer/test/__init__.py b/ee/hogai/summarizer/test/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/hogai/summarizer/test/test_nodes.py b/ee/hogai/summarizer/test/test_nodes.py deleted file mode 100644 index 0dc6703427..0000000000 --- a/ee/hogai/summarizer/test/test_nodes.py +++ /dev/null @@ -1,181 +0,0 @@ -from unittest.mock import patch - -from django.test import override_settings -from langchain_core.messages import ( - HumanMessage as LangchainHumanMessage, -) -from langchain_core.runnables import RunnableLambda -from rest_framework.exceptions import ValidationError - -from ee.hogai.summarizer.nodes import SummarizerNode -from ee.hogai.summarizer.prompts import SUMMARIZER_INSTRUCTION_PROMPT, SUMMARIZER_SYSTEM_PROMPT -from ee.hogai.utils.types import AssistantState -from posthog.api.services.query import process_query_dict -from posthog.schema import ( - AssistantTrendsEventsNode, - AssistantTrendsQuery, - HumanMessage, - VisualizationMessage, -) -from posthog.test.base import APIBaseTest, ClickhouseTestMixin - - -@override_settings(IN_UNIT_TESTING=True) -class TestSummarizerNode(ClickhouseTestMixin, APIBaseTest): - maxDiff = None - - @patch("ee.hogai.summarizer.nodes.process_query_dict", side_effect=process_query_dict) - def test_node_runs(self, mock_process_query_dict): - node = SummarizerNode(self.team) - with patch.object(SummarizerNode, "_model") as generator_model_mock: - generator_model_mock.return_value = RunnableLambda( - lambda _: LangchainHumanMessage(content="The results indicate foobar.") - ) - new_state = node.run( - AssistantState( - messages=[ - HumanMessage(content="Text", id="test"), - VisualizationMessage( - answer=AssistantTrendsQuery(series=[AssistantTrendsEventsNode()]), - plan="Plan", - id="test2", - initiator="test", - ), - ], - plan="Plan", - start_id="test", - ), - {}, - ) - mock_process_query_dict.assert_called_once() # Query processing started - msg = new_state.messages[0] - self.assertEqual(msg.content, "The results indicate foobar.") - self.assertEqual(msg.type, "ai") - self.assertIsNotNone(msg.id) - - @patch( - "ee.hogai.summarizer.nodes.process_query_dict", - side_effect=ValueError("You have not glibbled the glorp before running this."), - ) - def test_node_handles_internal_error(self, mock_process_query_dict): - node = SummarizerNode(self.team) - with patch.object(SummarizerNode, "_model") as generator_model_mock: - generator_model_mock.return_value = RunnableLambda( - lambda _: LangchainHumanMessage(content="The results indicate foobar.") - ) - new_state = node.run( - AssistantState( - messages=[ - HumanMessage(content="Text", id="test"), - VisualizationMessage( - answer=AssistantTrendsQuery(series=[AssistantTrendsEventsNode()]), - plan="Plan", - id="test2", - initiator="test", - ), - ], - plan="Plan", - start_id="test", - ), - {}, - ) - mock_process_query_dict.assert_called_once() # Query processing started - msg = new_state.messages[0] - self.assertEqual(msg.content, "There was an unknown error running this query.") - self.assertEqual(msg.type, "ai/failure") - self.assertIsNotNone(msg.id) - - @patch( - "ee.hogai.summarizer.nodes.process_query_dict", - side_effect=ValidationError( - "This query exceeds the capabilities of our picolator. Try de-brolling its flim-flam." - ), - ) - def test_node_handles_exposed_error(self, mock_process_query_dict): - node = SummarizerNode(self.team) - with patch.object(SummarizerNode, "_model") as generator_model_mock: - generator_model_mock.return_value = RunnableLambda( - lambda _: LangchainHumanMessage(content="The results indicate foobar.") - ) - new_state = node.run( - AssistantState( - messages=[ - HumanMessage(content="Text", id="test"), - VisualizationMessage( - answer=AssistantTrendsQuery(series=[AssistantTrendsEventsNode()]), - plan="Plan", - id="test2", - initiator="test", - ), - ], - plan="Plan", - start_id="test", - ), - {}, - ) - mock_process_query_dict.assert_called_once() # Query processing started - msg = new_state.messages[0] - self.assertEqual( - msg.content, - "There was an error running this query: This query exceeds the capabilities of our picolator. Try de-brolling its flim-flam.", - ) - self.assertEqual(msg.type, "ai/failure") - self.assertIsNotNone(msg.id) - - def test_node_requires_a_viz_message_in_state(self): - node = SummarizerNode(self.team) - - with self.assertRaisesMessage( - ValueError, "Can only run summarization with a visualization message as the last one in the state" - ): - node.run( - AssistantState( - messages=[ - HumanMessage(content="Text"), - ], - plan="Plan", - start_id="test", - ), - {}, - ) - - def test_node_requires_viz_message_in_state_to_have_query(self): - node = SummarizerNode(self.team) - - with self.assertRaisesMessage(ValueError, "Did not found query in the visualization message"): - node.run( - AssistantState( - messages=[ - VisualizationMessage(answer=None, plan="Plan", id="test"), - ], - plan="Plan", - start_id="test", - ), - {}, - ) - - def test_agent_reconstructs_conversation(self): - node = SummarizerNode(self.team) - - history = node._construct_messages( - AssistantState( - messages=[ - HumanMessage(content="What's the trends in signups?", id="test"), - VisualizationMessage( - answer=AssistantTrendsQuery(series=[AssistantTrendsEventsNode()]), - plan="Plan", - id="test2", - initiator="test", - ), - ], - start_id="test", - ) - ) - self.assertEqual( - history, - [ - ("system", SUMMARIZER_SYSTEM_PROMPT), - ("human", "What's the trends in signups?"), - ("human", SUMMARIZER_INSTRUCTION_PROMPT), - ], - ) diff --git a/ee/hogai/taxonomy_agent/__init__.py b/ee/hogai/taxonomy_agent/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/hogai/taxonomy_agent/nodes.py b/ee/hogai/taxonomy_agent/nodes.py deleted file mode 100644 index 74724b1d5d..0000000000 --- a/ee/hogai/taxonomy_agent/nodes.py +++ /dev/null @@ -1,308 +0,0 @@ -import xml.etree.ElementTree as ET -from abc import ABC -from functools import cached_property -from typing import cast - -from git import Optional -from langchain.agents.format_scratchpad import format_log_to_str -from langchain_core.agents import AgentAction -from langchain_core.messages import ( - AIMessage as LangchainAssistantMessage, - BaseMessage, - HumanMessage as LangchainHumanMessage, - merge_message_runs, -) -from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate -from langchain_core.runnables import RunnableConfig -from langchain_openai import ChatOpenAI -from langgraph.errors import NodeInterrupt -from pydantic import ValidationError - -from posthog.taxonomy.taxonomy import CORE_FILTER_DEFINITIONS_BY_GROUP -from ee.hogai.taxonomy_agent.parsers import ( - ReActParserException, - ReActParserMissingActionException, - parse_react_agent_output, -) -from ee.hogai.taxonomy_agent.prompts import ( - CORE_MEMORY_INSTRUCTIONS, - REACT_DEFINITIONS_PROMPT, - REACT_FOLLOW_UP_PROMPT, - REACT_FORMAT_PROMPT, - REACT_FORMAT_REMINDER_PROMPT, - REACT_HUMAN_IN_THE_LOOP_PROMPT, - REACT_MALFORMED_JSON_PROMPT, - REACT_MISSING_ACTION_CORRECTION_PROMPT, - REACT_MISSING_ACTION_PROMPT, - REACT_PROPERTY_FILTERS_PROMPT, - REACT_PYDANTIC_VALIDATION_EXCEPTION_PROMPT, - REACT_SCRATCHPAD_PROMPT, - REACT_USER_PROMPT, -) -from ee.hogai.taxonomy_agent.toolkit import TaxonomyAgentTool, TaxonomyAgentToolkit -from ee.hogai.utils.helpers import filter_messages, remove_line_breaks, slice_messages_to_conversation_start -from ee.hogai.utils.nodes import AssistantNode -from ee.hogai.utils.types import AssistantState, PartialAssistantState -from posthog.hogql_queries.ai.team_taxonomy_query_runner import TeamTaxonomyQueryRunner -from posthog.hogql_queries.query_runner import ExecutionMode -from posthog.models.group_type_mapping import GroupTypeMapping -from posthog.schema import ( - AssistantMessage, - CachedTeamTaxonomyQueryResponse, - HumanMessage, - TeamTaxonomyQuery, - VisualizationMessage, -) - - -class TaxonomyAgentPlannerNode(AssistantNode): - def _run_with_prompt_and_toolkit( - self, - state: AssistantState, - prompt: ChatPromptTemplate, - toolkit: TaxonomyAgentToolkit, - config: Optional[RunnableConfig] = None, - ) -> PartialAssistantState: - intermediate_steps = state.intermediate_steps or [] - conversation = ( - prompt - + ChatPromptTemplate.from_messages( - [ - ("user", REACT_DEFINITIONS_PROMPT), - ], - template_format="mustache", - ) - + self._construct_messages(state) - + ChatPromptTemplate.from_messages( - [ - ("user", REACT_SCRATCHPAD_PROMPT), - ], - template_format="mustache", - ) - ) - - agent = conversation | merge_message_runs() | self._model | parse_react_agent_output - - try: - result = cast( - AgentAction, - agent.invoke( - { - "react_format": self._get_react_format_prompt(toolkit), - "core_memory": self.core_memory.text if self.core_memory else "", - "react_format_reminder": REACT_FORMAT_REMINDER_PROMPT, - "react_property_filters": self._get_react_property_filters_prompt(), - "react_human_in_the_loop": REACT_HUMAN_IN_THE_LOOP_PROMPT, - "groups": self._team_group_types, - "events": self._events_prompt, - "agent_scratchpad": self._get_agent_scratchpad(intermediate_steps), - "core_memory_instructions": CORE_MEMORY_INSTRUCTIONS, - }, - config, - ), - ) - except ReActParserException as e: - if isinstance(e, ReActParserMissingActionException): - # When the agent doesn't output the "Action:" block, we need to correct the log and append the action block, - # so that it has a higher chance to recover. - corrected_log = str( - ChatPromptTemplate.from_template(REACT_MISSING_ACTION_CORRECTION_PROMPT, template_format="mustache") - .format_messages(output=e.llm_output)[0] - .content - ) - result = AgentAction( - "handle_incorrect_response", - REACT_MISSING_ACTION_PROMPT, - corrected_log, - ) - else: - result = AgentAction( - "handle_incorrect_response", - REACT_MALFORMED_JSON_PROMPT, - e.llm_output, - ) - - return PartialAssistantState( - intermediate_steps=[*intermediate_steps, (result, None)], - ) - - def router(self, state: AssistantState): - if state.intermediate_steps: - return "tools" - raise ValueError("Invalid state.") - - @property - def _model(self) -> ChatOpenAI: - return ChatOpenAI(model="gpt-4o", temperature=0, streaming=True, stream_usage=True) - - def _get_react_format_prompt(self, toolkit: TaxonomyAgentToolkit) -> str: - return cast( - str, - ChatPromptTemplate.from_template(REACT_FORMAT_PROMPT, template_format="mustache") - .format_messages( - tools=toolkit.render_text_description(), - tool_names=", ".join([t["name"] for t in toolkit.tools]), - )[0] - .content, - ) - - def _get_react_property_filters_prompt(self) -> str: - return cast( - str, - ChatPromptTemplate.from_template(REACT_PROPERTY_FILTERS_PROMPT, template_format="mustache") - .format_messages(groups=self._team_group_types)[0] - .content, - ) - - @cached_property - def _events_prompt(self) -> str: - response = TeamTaxonomyQueryRunner(TeamTaxonomyQuery(), self._team).run( - ExecutionMode.RECENT_CACHE_CALCULATE_ASYNC_IF_STALE_AND_BLOCKING_ON_MISS - ) - - if not isinstance(response, CachedTeamTaxonomyQueryResponse): - raise ValueError("Failed to generate events prompt.") - - events: list[str] = [ - # Add "All Events" to the mapping - "All Events", - ] - for item in response.results: - if len(response.results) > 25 and item.count <= 3: - continue - events.append(item.event) - - root = ET.Element("defined_events") - for event_name in events: - event_tag = ET.SubElement(root, "event") - name_tag = ET.SubElement(event_tag, "name") - name_tag.text = event_name - - if event_core_definition := CORE_FILTER_DEFINITIONS_BY_GROUP["events"].get(event_name): - if event_core_definition.get("system") or event_core_definition.get("ignored_in_assistant"): - continue # Skip irrelevant events - if description := event_core_definition.get("description"): - desc_tag = ET.SubElement(event_tag, "description") - if label := event_core_definition.get("label"): - desc_tag.text = f"{label}. {description}" - else: - desc_tag.text = description - desc_tag.text = remove_line_breaks(desc_tag.text) - return ET.tostring(root, encoding="unicode") - - @cached_property - def _team_group_types(self) -> list[str]: - return list( - GroupTypeMapping.objects.filter(project_id=self._team.project_id) - .order_by("group_type_index") - .values_list("group_type", flat=True) - ) - - def _construct_messages(self, state: AssistantState) -> list[BaseMessage]: - """ - Reconstruct the conversation for the agent. On this step we only care about previously asked questions and generated plans. All other messages are filtered out. - """ - start_id = state.start_id - filtered_messages = filter_messages(slice_messages_to_conversation_start(state.messages, start_id)) - conversation = [] - - for idx, message in enumerate(filtered_messages): - if isinstance(message, HumanMessage): - # Add initial instructions. - if idx == 0: - conversation.append( - HumanMessagePromptTemplate.from_template(REACT_USER_PROMPT, template_format="mustache").format( - question=message.content - ) - ) - # Add follow-up instructions only for the human message that initiated a generation. - elif message.id == start_id: - conversation.append( - HumanMessagePromptTemplate.from_template( - REACT_FOLLOW_UP_PROMPT, - template_format="mustache", - ).format(feedback=message.content) - ) - # Everything else leave as is. - else: - conversation.append(LangchainHumanMessage(content=message.content)) - elif isinstance(message, VisualizationMessage): - conversation.append(LangchainAssistantMessage(content=message.plan or "")) - elif isinstance(message, AssistantMessage) and ( - # Filter out summarizer messages (which always follow viz), but leave clarification questions in - idx < 1 or not isinstance(filtered_messages[idx - 1], VisualizationMessage) - ): - conversation.append(LangchainAssistantMessage(content=message.content)) - - return conversation - - def _get_agent_scratchpad(self, scratchpad: list[tuple[AgentAction, str | None]]) -> str: - actions = [] - for action, observation in scratchpad: - if observation is None: - continue - actions.append((action, observation)) - return format_log_to_str(actions) - - -class TaxonomyAgentPlannerToolsNode(AssistantNode, ABC): - def _run_with_toolkit( - self, state: AssistantState, toolkit: TaxonomyAgentToolkit, config: Optional[RunnableConfig] = None - ) -> PartialAssistantState: - intermediate_steps = state.intermediate_steps or [] - action, observation = intermediate_steps[-1] - - try: - input = TaxonomyAgentTool.model_validate({"name": action.tool, "arguments": action.tool_input}).root - except ValidationError as e: - observation = str( - ChatPromptTemplate.from_template(REACT_PYDANTIC_VALIDATION_EXCEPTION_PROMPT, template_format="mustache") - .format_messages(exception=e.errors(include_url=False))[0] - .content - ) - return PartialAssistantState( - intermediate_steps=[*intermediate_steps[:-1], (action, str(observation))], - ) - - # The plan has been found. Move to the generation. - if input.name == "final_answer": - return PartialAssistantState( - plan=input.arguments, - intermediate_steps=[], - ) - if input.name == "ask_user_for_help": - # The agent has requested help, so we interrupt the graph. - if not state.resumed: - raise NodeInterrupt(input.arguments) - - # Feedback was provided. - last_message = state.messages[-1] - response = "" - if isinstance(last_message, HumanMessage): - response = last_message.content - - return PartialAssistantState( - resumed=False, - intermediate_steps=[*intermediate_steps[:-1], (action, response)], - ) - - output = "" - if input.name == "retrieve_event_properties": - output = toolkit.retrieve_event_properties(input.arguments) - elif input.name == "retrieve_event_property_values": - output = toolkit.retrieve_event_property_values(input.arguments.event_name, input.arguments.property_name) - elif input.name == "retrieve_entity_properties": - output = toolkit.retrieve_entity_properties(input.arguments) - elif input.name == "retrieve_entity_property_values": - output = toolkit.retrieve_entity_property_values(input.arguments.entity, input.arguments.property_name) - else: - output = toolkit.handle_incorrect_response(input.arguments) - - return PartialAssistantState( - intermediate_steps=[*intermediate_steps[:-1], (action, output)], - ) - - def router(self, state: AssistantState): - if state.plan: - return "plan_found" - return "continue" diff --git a/ee/hogai/taxonomy_agent/parsers.py b/ee/hogai/taxonomy_agent/parsers.py deleted file mode 100644 index 9233b57479..0000000000 --- a/ee/hogai/taxonomy_agent/parsers.py +++ /dev/null @@ -1,70 +0,0 @@ -import json -import re - -from langchain_core.agents import AgentAction -from langchain_core.messages import AIMessage as LangchainAIMessage - - -class ReActParserException(ValueError): - llm_output: str - - def __init__(self, llm_output: str): - super().__init__(llm_output) - self.llm_output = llm_output - - -class ReActParserMalformedJsonException(ReActParserException): - pass - - -class ReActParserMissingActionException(ReActParserException): - """ - The ReAct agent didn't output the "Action:" block. - """ - - pass - - -ACTION_LOG_PREFIX = "Action:" - - -def parse_react_agent_output(message: LangchainAIMessage) -> AgentAction: - """ - A ReAct agent must output in this format: - - Some thoughts... - Action: - ```json - {"action": "action_name", "action_input": "action_input"} - ``` - """ - text = str(message.content) - if ACTION_LOG_PREFIX not in text: - raise ReActParserMissingActionException(text) - found = re.compile(r"^.*?`{3}(?:json)?\n?(.*?)`{3}.*?$", re.DOTALL).search(text) - if not found: - # JSON not found. - raise ReActParserMalformedJsonException(text) - try: - action = found.group(1).strip() - response = json.loads(action) - is_complete = "action" in response and "action_input" in response - except Exception: - # JSON is malformed or has a wrong type. - raise ReActParserMalformedJsonException(text) - if not is_complete: - # JSON does not contain an action. - raise ReActParserMalformedJsonException(text) - return AgentAction(response["action"], response.get("action_input", {}), text) - - -class PydanticOutputParserException(ValueError): - llm_output: str - """Serialized LLM output.""" - validation_message: str - """Pydantic validation error message.""" - - def __init__(self, llm_output: str, validation_message: str): - super().__init__(llm_output) - self.llm_output = llm_output - self.validation_message = validation_message diff --git a/ee/hogai/taxonomy_agent/prompts.py b/ee/hogai/taxonomy_agent/prompts.py deleted file mode 100644 index 4779da4e7f..0000000000 --- a/ee/hogai/taxonomy_agent/prompts.py +++ /dev/null @@ -1,140 +0,0 @@ -REACT_FORMAT_PROMPT = """ -You have access to the following tools: -{{tools}} - -Use a JSON blob to specify a tool by providing an action key (tool name) and an action_input key (tool input). - -Valid "action" values: {{tool_names}} - -Provide only ONE action per $JSON_BLOB, as shown: - -``` -{ - "action": $TOOL_NAME, - "action_input": $INPUT -} -``` - -Follow this format: - -Question: input question to answer -Thought: consider previous and subsequent steps -Action: -``` -$JSON_BLOB -``` -Observation: action result -... (repeat Thought/Action/Observation N times) -Thought: I know what to respond -Action: -``` -{ - "action": "final_answer", - "action_input": "Final response to human" -} -``` -""".strip() - -REACT_PROPERTY_FILTERS_PROMPT = """ -<property_filters> -Use property filters to provide a narrowed results. Only include property filters when they are essential to directly answer the user’s question. Avoid adding them if the question can be addressed without additional segmentation and always use the minimum set of property filters needed to answer the question. Properties have one of the four types: String, Numeric, Boolean, and DateTime. - -IMPORTANT: Do not check if a property is set unless the user explicitly asks for it. - -When using a property filter, you must: -- **Prioritize properties directly related to the context or objective of the user's query.** Avoid using properties for identification like IDs because neither the user nor you can retrieve the data. Instead, prioritize filtering based on general properties like `paidCustomer` or `icp_score`. -- **Ensure that you find both the property group and name.** Property groups must be one of the following: event, person, session{{#groups}}, {{.}}{{/groups}}. -- After selecting a property, **validate that the property value accurately reflects the intended criteria**. -- **Find the suitable operator for type** (e.g., `contains`, `is set`). The operators are listed below. -- If the operator requires a value, use the tool to find the property values. Verify that you can answer the question with given property values. If you can't, try to find a different property or event. -- You set logical operators to combine multiple properties of a single series: AND or OR. - -Infer the property groups from the user's request. If your first guess doesn't yield any results, try to adjust the property group. You must make sure that the property name matches the lookup value, e.g. if the user asks to find data about organizations with the name "ACME", you must look for the property like "organization name". - -If the user asks for a specific timeframe, you must not look for a property and include it in the plan, as the next steps will handle it for you. - -Supported operators for the String or Numeric types are: -- equals -- doesn't equal -- contains -- doesn't contain -- matches regex -- doesn't match regex -- is set -- is not set - -Supported operators for the DateTime type are: -- equals -- doesn't equal -- greater than -- less than -- is set -- is not set - -Supported operators for the Boolean type are: -- equals -- doesn't equal -- is set -- is not set - -All operators take a single value except for `equals` and `doesn't equal which can take one or more values. -</property_filters> -""".strip() - -REACT_HUMAN_IN_THE_LOOP_PROMPT = """ -<human_in_the_loop> -Ask the user for clarification if: -- The user's question is ambiguous. -- You can't find matching events or properties. -- You're unable to build a plan that effectively answers the user's question. -</human_in_the_loop> -""".strip() - -REACT_FORMAT_REMINDER_PROMPT = """ -Begin! Reminder that you must ALWAYS respond with a valid JSON blob of a single action. Use tools if necessary. Respond directly if appropriate. Format is Action:```$JSON_BLOB``` then Observation. -""".strip() - -REACT_DEFINITIONS_PROMPT = """ -Here are the event names. -{{events}} -""" - -REACT_SCRATCHPAD_PROMPT = """ -Thought: {{agent_scratchpad}} -""" - -REACT_USER_PROMPT = """ -Answer the following question as best you can. -Question: What events, properties and/or property values should I use to answer this question "{{question}}"? -""" - -REACT_FOLLOW_UP_PROMPT = """ -Improve the previously generated plan based on the feedback: {{feedback}} -""" - -REACT_MISSING_ACTION_PROMPT = """ -Your previous answer didn't output the `Action:` block. You must always follow the format described in the system prompt. -""" - -REACT_MISSING_ACTION_CORRECTION_PROMPT = """ -{{output}} -Action: I didn't output the `Action:` block. -""" - -REACT_MALFORMED_JSON_PROMPT = """ -Your previous answer had a malformed JSON. You must return a correct JSON response containing the `action` and `action_input` fields. -""" - -REACT_PYDANTIC_VALIDATION_EXCEPTION_PROMPT = """ -The action input you previously provided didn't pass the validation and raised a Pydantic validation exception. - -<pydantic_exception> -{{exception}} -</pydantic_exception> - -You must fix the exception and try again. -""" - -CORE_MEMORY_INSTRUCTIONS = """ -You have access to the core memory in the <core_memory> tag, which stores information about the user's company and product. -""".strip() diff --git a/ee/hogai/taxonomy_agent/test/__init__.py b/ee/hogai/taxonomy_agent/test/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/hogai/taxonomy_agent/test/test_nodes.py b/ee/hogai/taxonomy_agent/test/test_nodes.py deleted file mode 100644 index dfb5561881..0000000000 --- a/ee/hogai/taxonomy_agent/test/test_nodes.py +++ /dev/null @@ -1,301 +0,0 @@ -from unittest.mock import patch - -from django.test import override_settings -from langchain_core.agents import AgentAction -from langchain_core.messages import AIMessage as LangchainAIMessage -from langchain_core.runnables import RunnableConfig, RunnableLambda - -from ee.hogai.taxonomy_agent.nodes import ( - ChatPromptTemplate, - TaxonomyAgentPlannerNode, - TaxonomyAgentPlannerToolsNode, -) -from ee.hogai.taxonomy_agent.toolkit import TaxonomyAgentToolkit, ToolkitTool -from ee.hogai.utils.types import AssistantState, PartialAssistantState -from posthog.models import GroupTypeMapping -from posthog.schema import ( - AssistantMessage, - AssistantTrendsQuery, - FailureMessage, - HumanMessage, - RouterMessage, - VisualizationMessage, -) -from posthog.test.base import APIBaseTest, ClickhouseTestMixin, _create_event, _create_person - - -class DummyToolkit(TaxonomyAgentToolkit): - def _get_tools(self) -> list[ToolkitTool]: - return self._default_tools - - -@override_settings(IN_UNIT_TESTING=True) -class TestTaxonomyAgentPlannerNode(ClickhouseTestMixin, APIBaseTest): - def setUp(self): - super().setUp() - self.schema = AssistantTrendsQuery(series=[]) - - def _get_node(self): - class Node(TaxonomyAgentPlannerNode): - def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistantState: - prompt: ChatPromptTemplate = ChatPromptTemplate.from_messages([("user", "test")]) - toolkit = DummyToolkit(self._team) - return super()._run_with_prompt_and_toolkit(state, prompt, toolkit, config=config) - - return Node(self.team) - - def test_agent_reconstructs_conversation(self): - node = self._get_node() - history = node._construct_messages(AssistantState(messages=[HumanMessage(content="Text")])) - self.assertEqual(len(history), 1) - self.assertEqual(history[0].type, "human") - self.assertIn("Text", history[0].content) - self.assertNotIn(f"{{question}}", history[0].content) - - history = node._construct_messages( - AssistantState( - messages=[ - HumanMessage(content="Text", id="0"), - VisualizationMessage(answer=self.schema, plan="randomplan", id="1", initiator="0"), - ], - start_id="1", - ) - ) - self.assertEqual(len(history), 2) - self.assertEqual(history[0].type, "human") - self.assertIn("Text", history[0].content) - self.assertNotIn("{{question}}", history[0].content) - self.assertEqual(history[1].type, "ai") - self.assertEqual(history[1].content, "randomplan") - - history = node._construct_messages( - AssistantState( - messages=[ - HumanMessage(content="Text", id="0"), - VisualizationMessage(answer=self.schema, plan="randomplan", id="1", initiator="0"), - HumanMessage(content="Text", id="2"), - ], - start_id="2", - ) - ) - self.assertEqual(len(history), 3) - self.assertEqual(history[0].type, "human") - self.assertIn("Text", history[0].content) - self.assertNotIn("{{question}}", history[0].content) - self.assertEqual(history[1].type, "ai") - self.assertEqual(history[1].content, "randomplan") - self.assertEqual(history[2].type, "human") - self.assertIn("Text", history[2].content) - self.assertNotIn("{{question}}", history[2].content) - - def test_agent_reconstructs_conversation_and_omits_unknown_messages(self): - node = self._get_node() - history = node._construct_messages( - AssistantState( - messages=[ - HumanMessage(content="Text", id="0"), - RouterMessage(content="trends", id="1"), - AssistantMessage(content="test", id="2"), - ], - start_id="0", - ) - ) - self.assertEqual(len(history), 1) - self.assertEqual(history[0].type, "human") - self.assertIn("Text", history[0].content) - self.assertNotIn("{{question}}", history[0].content) - - def test_agent_reconstructs_conversation_with_failures(self): - node = self._get_node() - history = node._construct_messages( - AssistantState( - messages=[ - HumanMessage(content="Text"), - FailureMessage(content="Error"), - HumanMessage(content="Text"), - ], - ) - ) - self.assertEqual(len(history), 1) - self.assertEqual(history[0].type, "human") - self.assertIn("Text", history[0].content) - self.assertNotIn("{{question}}", history[0].content) - - def test_agent_reconstructs_typical_conversation(self): - node = self._get_node() - history = node._construct_messages( - AssistantState( - messages=[ - HumanMessage(content="Question 1", id="0"), - RouterMessage(content="trends", id="1"), - VisualizationMessage(answer=AssistantTrendsQuery(series=[]), plan="Plan 1", id="2", initiator="0"), - AssistantMessage(content="Summary 1", id="3"), - HumanMessage(content="Question 2", id="4"), - RouterMessage(content="funnel", id="5"), - AssistantMessage(content="Loop 1", id="6"), - HumanMessage(content="Loop Answer 1", id="7"), - VisualizationMessage(answer=AssistantTrendsQuery(series=[]), plan="Plan 2", id="8", initiator="4"), - AssistantMessage(content="Summary 2", id="9"), - HumanMessage(content="Question 3", id="10"), - RouterMessage(content="funnel", id="11"), - ], - start_id="10", - ) - ) - self.assertEqual(len(history), 7) - self.assertEqual(history[0].type, "human") - self.assertIn("Question 1", history[0].content) - self.assertEqual(history[1].type, "ai") - self.assertEqual(history[1].content, "Plan 1") - self.assertEqual(history[2].type, "human") - self.assertIn("Question 2", history[2].content) - self.assertEqual(history[3].type, "ai") - self.assertEqual(history[3].content, "Loop 1") - self.assertEqual(history[4].type, "human") - self.assertEqual(history[4].content, "Loop Answer 1") - self.assertEqual(history[5].type, "ai") - self.assertEqual(history[5].content, "Plan 2") - self.assertEqual(history[6].type, "human") - self.assertIn("Question 3", history[6].content) - - def test_agent_reconstructs_conversation_without_messages_after_parent(self): - node = self._get_node() - history = node._construct_messages( - AssistantState( - messages=[ - HumanMessage(content="Question 1", id="0"), - RouterMessage(content="trends", id="1"), - AssistantMessage(content="Loop 1", id="2"), - HumanMessage(content="Loop Answer 1", id="3"), - ], - start_id="0", - ) - ) - self.assertEqual(len(history), 1) - self.assertEqual(history[0].type, "human") - self.assertIn("Question 1", history[0].content) - - def test_agent_filters_out_low_count_events(self): - _create_person(distinct_ids=["test"], team=self.team) - for i in range(26): - _create_event(event=f"event{i}", distinct_id="test", team=self.team) - _create_event(event="distinctevent", distinct_id="test", team=self.team) - node = self._get_node() - self.assertEqual( - node._events_prompt, - "<defined_events><event><name>All Events</name><description>All events. This is a wildcard that matches all events.</description></event><event><name>distinctevent</name></event></defined_events>", - ) - - def test_agent_preserves_low_count_events_for_smaller_teams(self): - _create_person(distinct_ids=["test"], team=self.team) - _create_event(event="distinctevent", distinct_id="test", team=self.team) - node = self._get_node() - self.assertIn("distinctevent", node._events_prompt) - self.assertIn("all events", node._events_prompt) - - def test_agent_scratchpad(self): - node = self._get_node() - scratchpad = [ - (AgentAction(tool="test1", tool_input="input1", log="log1"), "test"), - (AgentAction(tool="test2", tool_input="input2", log="log2"), None), - (AgentAction(tool="test3", tool_input="input3", log="log3"), ""), - ] - prompt = node._get_agent_scratchpad(scratchpad) - self.assertIn("log1", prompt) - self.assertIn("log3", prompt) - - def test_agent_handles_output_without_action_block(self): - with patch( - "ee.hogai.taxonomy_agent.nodes.TaxonomyAgentPlannerNode._model", - return_value=RunnableLambda(lambda _: LangchainAIMessage(content="I don't want to output an action.")), - ): - node = self._get_node() - state_update = node.run(AssistantState(messages=[HumanMessage(content="Question")]), {}) - self.assertEqual(len(state_update.intermediate_steps), 1) - action, obs = state_update.intermediate_steps[0] - self.assertIsNone(obs) - self.assertIn("I don't want to output an action.", action.log) - self.assertIn("Action:", action.log) - self.assertIn("Action:", action.tool_input) - - def test_agent_handles_output_with_malformed_json(self): - with patch( - "ee.hogai.taxonomy_agent.nodes.TaxonomyAgentPlannerNode._model", - return_value=RunnableLambda(lambda _: LangchainAIMessage(content="Thought.\nAction: abc")), - ): - node = self._get_node() - state_update = node.run(AssistantState(messages=[HumanMessage(content="Question")]), {}) - self.assertEqual(len(state_update.intermediate_steps), 1) - action, obs = state_update.intermediate_steps[0] - self.assertIsNone(obs) - self.assertIn("Thought.\nAction: abc", action.log) - self.assertIn("action", action.tool_input) - self.assertIn("action_input", action.tool_input) - - def test_node_outputs_all_events_prompt(self): - node = self._get_node() - self.assertIn("All Events", node._events_prompt) - self.assertIn( - "<event><name>All Events</name><description>All events. This is a wildcard that matches all events.</description></event>", - node._events_prompt, - ) - - def test_format_prompt(self): - node = self._get_node() - self.assertNotIn("Human:", node._get_react_format_prompt(DummyToolkit(self.team))) - self.assertIn("retrieve_event_properties,", node._get_react_format_prompt(DummyToolkit(self.team))) - self.assertIn( - "retrieve_event_properties(event_name: str)", node._get_react_format_prompt(DummyToolkit(self.team)) - ) - - def test_property_filters_prompt(self): - GroupTypeMapping.objects.create(team=self.team, project=self.project, group_type="org", group_type_index=0) - GroupTypeMapping.objects.create(team=self.team, project=self.project, group_type="account", group_type_index=1) - node = self._get_node() - prompt = node._get_react_property_filters_prompt() - self.assertIn("org, account.", prompt) - - -@override_settings(IN_UNIT_TESTING=True) -class TestTaxonomyAgentPlannerToolsNode(ClickhouseTestMixin, APIBaseTest): - def _get_node(self): - class Node(TaxonomyAgentPlannerToolsNode): - def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistantState: - toolkit = DummyToolkit(self._team) - return super()._run_with_toolkit(state, toolkit, config=config) - - return Node(self.team) - - def test_node_handles_action_name_validation_error(self): - state = AssistantState( - intermediate_steps=[(AgentAction(tool="does not exist", tool_input="input", log="log"), "test")], - messages=[], - ) - node = self._get_node() - state_update = node.run(state, {}) - self.assertEqual(len(state_update.intermediate_steps), 1) - action, observation = state_update.intermediate_steps[0] - self.assertIsNotNone(observation) - self.assertIn("<pydantic_exception>", observation) - - def test_node_handles_action_input_validation_error(self): - state = AssistantState( - intermediate_steps=[ - (AgentAction(tool="retrieve_entity_property_values", tool_input="input", log="log"), "test") - ], - messages=[], - ) - node = self._get_node() - state_update = node.run(state, {}) - self.assertEqual(len(state_update.intermediate_steps), 1) - action, observation = state_update.intermediate_steps[0] - self.assertIsNotNone(observation) - self.assertIn("<pydantic_exception>", observation) - - def test_router(self): - node = self._get_node() - self.assertEqual(node.router(AssistantState(messages=[HumanMessage(content="Question")])), "continue") - self.assertEqual(node.router(AssistantState(messages=[HumanMessage(content="Question")], plan="")), "continue") - self.assertEqual( - node.router(AssistantState(messages=[HumanMessage(content="Question")], plan="plan")), "plan_found" - ) diff --git a/ee/hogai/taxonomy_agent/test/test_parsers.py b/ee/hogai/taxonomy_agent/test/test_parsers.py deleted file mode 100644 index d8e5ed61e6..0000000000 --- a/ee/hogai/taxonomy_agent/test/test_parsers.py +++ /dev/null @@ -1,78 +0,0 @@ -from langchain_core.messages import AIMessage as LangchainAIMessage - -from ee.hogai.taxonomy_agent.parsers import ( - ReActParserMalformedJsonException, - ReActParserMissingActionException, - parse_react_agent_output, -) -from posthog.test.base import BaseTest - - -class TestTaxonomyAgentParsers(BaseTest): - def test_parse_react_agent_output(self): - res = parse_react_agent_output( - LangchainAIMessage( - content=""" - Some thoughts... - Action: - ```json - {"action": "action_name", "action_input": "action_input"} - ``` - """ - ) - ) - self.assertEqual(res.tool, "action_name") - self.assertEqual(res.tool_input, "action_input") - - res = parse_react_agent_output( - LangchainAIMessage( - content=""" - Some thoughts... - Action: - ``` - {"action": "tool", "action_input": {"key": "value"}} - ``` - """ - ) - ) - self.assertEqual(res.tool, "tool") - self.assertEqual(res.tool_input, {"key": "value"}) - - self.assertRaises( - ReActParserMissingActionException, parse_react_agent_output, LangchainAIMessage(content="Some thoughts...") - ) - self.assertRaises( - ReActParserMalformedJsonException, - parse_react_agent_output, - LangchainAIMessage(content="Some thoughts...\nAction: abc"), - ) - self.assertRaises( - ReActParserMalformedJsonException, - parse_react_agent_output, - LangchainAIMessage(content="Some thoughts...\nAction:"), - ) - self.assertRaises( - ReActParserMalformedJsonException, - parse_react_agent_output, - LangchainAIMessage(content="Some thoughts...\nAction: {}"), - ) - self.assertRaises( - ReActParserMalformedJsonException, - parse_react_agent_output, - LangchainAIMessage(content="Some thoughts...\nAction:\n```\n{}\n```"), - ) - self.assertRaises( - ReActParserMalformedJsonException, - parse_react_agent_output, - LangchainAIMessage(content="Some thoughts...\nAction:\n```\n{not a json}\n```"), - ) - self.assertRaises( - ReActParserMalformedJsonException, - parse_react_agent_output, - LangchainAIMessage(content='Some thoughts...\nAction:\n```\n{"action":"tool"}\n```'), - ) - self.assertRaises( - ReActParserMalformedJsonException, - parse_react_agent_output, - LangchainAIMessage(content='Some thoughts...\nAction:\n```\n{"action_input":"input"}\n```'), - ) diff --git a/ee/hogai/taxonomy_agent/test/test_toolkit.py b/ee/hogai/taxonomy_agent/test/test_toolkit.py deleted file mode 100644 index 32967d0916..0000000000 --- a/ee/hogai/taxonomy_agent/test/test_toolkit.py +++ /dev/null @@ -1,273 +0,0 @@ -from datetime import datetime - -from django.test import override_settings -from freezegun import freeze_time - -from ee.hogai.taxonomy_agent.toolkit import TaxonomyAgentToolkit, ToolkitTool -from posthog.models.group.util import create_group -from posthog.models.group_type_mapping import GroupTypeMapping -from posthog.models.property_definition import PropertyDefinition, PropertyType -from posthog.test.base import APIBaseTest, ClickhouseTestMixin, _create_event, _create_person - - -class DummyToolkit(TaxonomyAgentToolkit): - def _get_tools(self) -> list[ToolkitTool]: - return self._default_tools - - -@override_settings(IN_UNIT_TESTING=True) -class TestTaxonomyAgentToolkit(ClickhouseTestMixin, APIBaseTest): - def _create_taxonomy(self): - PropertyDefinition.objects.create( - team=self.team, type=PropertyDefinition.Type.EVENT, name="$browser", property_type=PropertyType.String - ) - PropertyDefinition.objects.create( - team=self.team, type=PropertyDefinition.Type.EVENT, name="id", property_type=PropertyType.Numeric - ) - PropertyDefinition.objects.create( - team=self.team, type=PropertyDefinition.Type.EVENT, name="bool", property_type=PropertyType.Boolean - ) - PropertyDefinition.objects.create( - team=self.team, type=PropertyDefinition.Type.EVENT, name="date", property_type=PropertyType.Datetime - ) - - _create_person( - distinct_ids=["person1"], - team=self.team, - properties={"email": "person1@example.com"}, - ) - _create_event( - event="event1", - distinct_id="person1", - properties={ - "$browser": "Chrome", - "date": datetime(2024, 1, 1).isoformat(), - }, - team=self.team, - ) - _create_event( - event="event1", - distinct_id="person1", - properties={ - "$browser": "Firefox", - "bool": True, - }, - team=self.team, - ) - - _create_person( - distinct_ids=["person2"], - properties={"email": "person2@example.com"}, - team=self.team, - ) - for i in range(10): - _create_event( - event="event1", - distinct_id=f"person2", - properties={"id": i}, - team=self.team, - ) - - def test_retrieve_entity_properties(self): - toolkit = DummyToolkit(self.team) - - PropertyDefinition.objects.create( - team=self.team, type=PropertyDefinition.Type.PERSON, name="test", property_type="String" - ) - self.assertEqual( - toolkit.retrieve_entity_properties("person"), - "<properties><String><prop><name>test</name></prop></String></properties>", - ) - - GroupTypeMapping.objects.create( - team=self.team, project_id=self.team.project_id, group_type_index=0, group_type="group" - ) - PropertyDefinition.objects.create( - team=self.team, type=PropertyDefinition.Type.GROUP, group_type_index=0, name="test", property_type="Numeric" - ) - self.assertEqual( - toolkit.retrieve_entity_properties("group"), - "<properties><Numeric><prop><name>test</name></prop></Numeric></properties>", - ) - - self.assertNotEqual( - toolkit.retrieve_entity_properties("session"), - "<properties />", - ) - self.assertIn( - "$session_duration", - toolkit.retrieve_entity_properties("session"), - ) - - def test_retrieve_entity_properties_returns_descriptive_feedback_without_properties(self): - toolkit = DummyToolkit(self.team) - self.assertEqual( - toolkit.retrieve_entity_properties("person"), - "Properties do not exist in the taxonomy for the entity person.", - ) - - def test_retrieve_entity_property_values(self): - toolkit = DummyToolkit(self.team) - self.assertEqual( - toolkit.retrieve_entity_property_values("session", "$session_duration"), - "30, 146, 2 and many more distinct values.", - ) - self.assertEqual( - toolkit.retrieve_entity_property_values("session", "nonsense"), - "The property nonsense does not exist in the taxonomy.", - ) - - PropertyDefinition.objects.create( - team=self.team, type=PropertyDefinition.Type.PERSON, name="email", property_type=PropertyType.String - ) - PropertyDefinition.objects.create( - team=self.team, type=PropertyDefinition.Type.PERSON, name="id", property_type=PropertyType.Numeric - ) - - for i in range(5): - id = f"person{i}" - with freeze_time(f"2024-01-01T{i}:00:00Z"): - _create_person( - distinct_ids=[id], - properties={"email": f"{id}@example.com", "id": i}, - team=self.team, - ) - with freeze_time(f"2024-01-02T00:00:00Z"): - _create_person( - distinct_ids=["person5"], - properties={"email": "person5@example.com", "id": 5}, - team=self.team, - ) - - self.assertEqual( - toolkit.retrieve_entity_property_values("person", "email"), - '"person5@example.com", "person4@example.com", "person3@example.com", "person2@example.com", "person1@example.com" and 1 more distinct value.', - ) - self.assertEqual( - toolkit.retrieve_entity_property_values("person", "id"), - "5, 4, 3, 2, 1 and 1 more distinct value.", - ) - - toolkit = DummyToolkit(self.team) - GroupTypeMapping.objects.create( - team=self.team, project_id=self.team.project_id, group_type_index=0, group_type="proj" - ) - GroupTypeMapping.objects.create( - team=self.team, project_id=self.team.project_id, group_type_index=1, group_type="org" - ) - PropertyDefinition.objects.create( - team=self.team, type=PropertyDefinition.Type.GROUP, group_type_index=0, name="test", property_type="Numeric" - ) - PropertyDefinition.objects.create( - team=self.team, type=PropertyDefinition.Type.GROUP, group_type_index=1, name="test", property_type="String" - ) - - for i in range(7): - id = f"group{i}" - with freeze_time(f"2024-01-01T{i}:00:00Z"): - create_group( - group_type_index=0, - group_key=id, - properties={"test": i}, - team_id=self.team.pk, - ) - with freeze_time(f"2024-01-02T00:00:00Z"): - create_group( - group_type_index=1, - group_key="org", - properties={"test": "7"}, - team_id=self.team.pk, - ) - - self.assertEqual( - toolkit.retrieve_entity_property_values("proj", "test"), - "6, 5, 4, 3, 2 and 2 more distinct values.", - ) - self.assertEqual(toolkit.retrieve_entity_property_values("org", "test"), '"7"') - - def test_group_names(self): - GroupTypeMapping.objects.create( - team=self.team, project_id=self.team.project_id, group_type_index=0, group_type="proj" - ) - GroupTypeMapping.objects.create( - team=self.team, project_id=self.team.project_id, group_type_index=1, group_type="org" - ) - toolkit = DummyToolkit(self.team) - self.assertEqual(toolkit._entity_names, ["person", "session", "proj", "org"]) - - def test_retrieve_event_properties_returns_descriptive_feedback_without_properties(self): - toolkit = DummyToolkit(self.team) - self.assertEqual( - toolkit.retrieve_event_properties("pageview"), - "Properties do not exist in the taxonomy for the event pageview.", - ) - - def test_empty_events(self): - toolkit = DummyToolkit(self.team) - self.assertEqual( - toolkit.retrieve_event_properties("test"), "Properties do not exist in the taxonomy for the event test." - ) - - _create_person( - distinct_ids=["person1"], - team=self.team, - properties={}, - ) - _create_event( - event="event1", - distinct_id="person1", - properties={}, - team=self.team, - ) - - toolkit = DummyToolkit(self.team) - self.assertEqual( - toolkit.retrieve_event_properties("event1"), - "Properties do not exist in the taxonomy for the event event1.", - ) - - def test_retrieve_event_properties(self): - self._create_taxonomy() - toolkit = DummyToolkit(self.team) - prompt = toolkit.retrieve_event_properties("event1") - - self.assertIn( - "<Numeric><prop><name>id</name></prop></Numeric>", - prompt, - ) - self.assertIn( - "<String><prop><name>$browser</name><description>Name of the browser the user has used.</description></prop></String>", - prompt, - ) - self.assertIn( - "<DateTime><prop><name>date</name></prop></DateTime>", - prompt, - ) - self.assertIn( - "<Boolean><prop><name>bool</name></prop></Boolean>", - prompt, - ) - - def test_retrieve_event_property_values(self): - self._create_taxonomy() - toolkit = DummyToolkit(self.team) - - self.assertIn('"Chrome"', toolkit.retrieve_event_property_values("event1", "$browser")) - self.assertIn('"Firefox"', toolkit.retrieve_event_property_values("event1", "$browser")) - self.assertEqual(toolkit.retrieve_event_property_values("event1", "bool"), "true") - self.assertEqual( - toolkit.retrieve_event_property_values("event1", "id"), - "9, 8, 7, 6, 5 and 5 more distinct values.", - ) - self.assertEqual( - toolkit.retrieve_event_property_values("event1", "date"), f'"{datetime(2024, 1, 1).isoformat()}"' - ) - - def test_enrich_props_with_descriptions(self): - toolkit = DummyToolkit(self.team) - res = toolkit._enrich_props_with_descriptions("event", [("$geoip_city_name", "String")]) - self.assertEqual(len(res), 1) - prop, type, description = res[0] - self.assertEqual(prop, "$geoip_city_name") - self.assertEqual(type, "String") - self.assertIsNotNone(description) diff --git a/ee/hogai/taxonomy_agent/toolkit.py b/ee/hogai/taxonomy_agent/toolkit.py deleted file mode 100644 index 00b91de772..0000000000 --- a/ee/hogai/taxonomy_agent/toolkit.py +++ /dev/null @@ -1,437 +0,0 @@ -import xml.etree.ElementTree as ET -from abc import ABC, abstractmethod -from collections.abc import Iterable -from functools import cached_property -from textwrap import dedent -from typing import Literal, Optional, TypedDict, Union, cast - -from pydantic import BaseModel, Field, RootModel - -from posthog.taxonomy.taxonomy import CORE_FILTER_DEFINITIONS_BY_GROUP -from posthog.hogql.database.schema.channel_type import DEFAULT_CHANNEL_TYPES -from posthog.hogql_queries.ai.actors_property_taxonomy_query_runner import ActorsPropertyTaxonomyQueryRunner -from posthog.hogql_queries.ai.event_taxonomy_query_runner import EventTaxonomyQueryRunner -from posthog.hogql_queries.query_runner import ExecutionMode -from posthog.models.group_type_mapping import GroupTypeMapping -from posthog.models.property_definition import PropertyDefinition, PropertyType -from posthog.models.team.team import Team -from posthog.schema import ( - ActorsPropertyTaxonomyQuery, - CachedActorsPropertyTaxonomyQueryResponse, - CachedEventTaxonomyQueryResponse, - EventTaxonomyQuery, -) - - -class ToolkitTool(TypedDict): - name: str - signature: str - description: str - - -class RetrieveEntityPropertiesValuesArgs(BaseModel): - entity: str - property_name: str - - -class RetrieveEntityPropertiesValuesTool(BaseModel): - name: Literal["retrieve_entity_property_values"] - arguments: RetrieveEntityPropertiesValuesArgs - - -class RetrieveEventPropertiesValuesArgs(BaseModel): - event_name: str - property_name: str - - -class RetrieveEventPropertiesValuesTool(BaseModel): - name: Literal["retrieve_event_property_values"] - arguments: RetrieveEventPropertiesValuesArgs - - -class SingleArgumentTaxonomyAgentTool(BaseModel): - name: Literal[ - "retrieve_entity_properties", - "retrieve_event_properties", - "final_answer", - "handle_incorrect_response", - "ask_user_for_help", - ] - arguments: str - - -class TaxonomyAgentTool( - RootModel[ - Union[SingleArgumentTaxonomyAgentTool, RetrieveEntityPropertiesValuesTool, RetrieveEventPropertiesValuesTool] - ] -): - root: Union[ - SingleArgumentTaxonomyAgentTool, RetrieveEntityPropertiesValuesTool, RetrieveEventPropertiesValuesTool - ] = Field(..., discriminator="name") - - -class TaxonomyAgentToolkit(ABC): - _team: Team - - def __init__(self, team: Team): - self._team = team - - @cached_property - def tools(self) -> list[ToolkitTool]: - return [ - { - "name": tool["name"], - "signature": tool["signature"], - "description": dedent(tool["description"]), - } - for tool in self._get_tools() - ] - - @abstractmethod - def _get_tools(self) -> list[ToolkitTool]: - raise NotImplementedError - - @property - def _default_tools(self) -> list[ToolkitTool]: - stringified_entities = ", ".join([f"'{entity}'" for entity in self._entity_names]) - return [ - { - "name": "retrieve_event_properties", - "signature": "(event_name: str)", - "description": """ - Use this tool to retrieve the property names of an event that the user has in their taxonomy. You will receive a list of properties containing their name, value type, and description, or a message that properties have not been found. - - - **Try other events** if the tool doesn't return any properties. - - **Prioritize properties that are directly related to the context or objective of the user's query.** - - **Avoid using ambiguous properties** unless their relevance is explicitly confirmed. - - Args: - event_name: The name of the event that you want to retrieve properties for. - """, - }, - { - "name": "retrieve_event_property_values", - "signature": "(event_name: str, property_name: str)", - "description": """ - Use this tool to retrieve the property values for an event that the user has in their taxonomy. Adjust filters to these values. You will receive a list of property values or a message that property values have not been found. Some properties can have many values, so the output will be truncated. Use your judgment to find a proper value. - - Args: - event_name: The name of the event that you want to retrieve values for. - property_name: The name of the property that you want to retrieve values for. - """, - }, - { - "name": f"retrieve_entity_properties", - "signature": f"(entity: Literal[{stringified_entities}])", - "description": """ - Use this tool to retrieve property names for a property group (entity) that the user has in their taxonomy. You will receive a list of properties containing their name, value type, and description, or a message that properties have not been found. - - - **Infer the property groups from the user's request.** - - **Try other entities** if the tool doesn't return any properties. - - **Prioritize properties that are directly related to the context or objective of the user's query.** - - **Avoid using ambiguous properties** unless their relevance is explicitly confirmed. - - Args: - entity: The type of the entity that you want to retrieve properties for. - """, - }, - { - "name": "retrieve_entity_property_values", - "signature": f"(entity: Literal[{stringified_entities}], property_name: str)", - "description": """ - Use this tool to retrieve property values for a property name that the user has in their taxonomy. Adjust filters to these values. You will receive a list of property values or a message that property values have not been found. Some properties can have many values, so the output will be truncated. Use your judgment to find a proper value. - - Args: - entity: The type of the entity that you want to retrieve properties for. - property_name: The name of the property that you want to retrieve values for. - """, - }, - { - "name": "ask_user_for_help", - "signature": "(question: str)", - "description": """ - Use this tool to ask a question to the user. Your question must be concise and clear. - - Args: - question: The question you want to ask. - """, - }, - ] - - def render_text_description(self) -> str: - """ - Render the tool name and description in plain text. - - Returns: - The rendered text. - - Output will be in the format of: - - .. code-block:: markdown - - search: This tool is used for search - calculator: This tool is used for math - """ - descriptions = [] - for tool in self.tools: - description = f"{tool['name']}{tool['signature']} - {tool['description']}" - descriptions.append(description) - return "\n".join(descriptions) - - @property - def _groups(self): - return GroupTypeMapping.objects.filter(project_id=self._team.project_id).order_by("group_type_index") - - @cached_property - def _entity_names(self) -> list[str]: - """ - The schemas use `group_type_index` for groups complicating things for the agent. Instead, we use groups' names, - so the generation step will handle their indexes. Tools would need to support multiple arguments, or we would need - to create various tools for different group types. Since we don't use function calling here, we want to limit the - number of tools because non-function calling models can't handle many tools. - """ - entities = [ - "person", - "session", - *[group.group_type for group in self._groups], - ] - return entities - - def _generate_properties_xml(self, children: list[tuple[str, str | None, str | None]]): - root = ET.Element("properties") - property_type_to_tag = {} - - for name, property_type, description in children: - # Do not include properties that are ambiguous. - if property_type is None: - continue - if property_type not in property_type_to_tag: - property_type_to_tag[property_type] = ET.SubElement(root, property_type) - - type_tag = property_type_to_tag[property_type] - prop = ET.SubElement(type_tag, "prop") - ET.SubElement(prop, "name").text = name - if description: - ET.SubElement(prop, "description").text = description - - return ET.tostring(root, encoding="unicode") - - def _enrich_props_with_descriptions(self, entity: str, props: Iterable[tuple[str, str | None]]): - enriched_props = [] - mapping = { - "session": CORE_FILTER_DEFINITIONS_BY_GROUP["session_properties"], - "person": CORE_FILTER_DEFINITIONS_BY_GROUP["person_properties"], - "event": CORE_FILTER_DEFINITIONS_BY_GROUP["event_properties"], - } - for prop_name, prop_type in props: - description = None - if entity_definition := mapping.get(entity, {}).get(prop_name): - if entity_definition.get("system") or entity_definition.get("ignored_in_assistant"): - continue - description = entity_definition.get("description") - enriched_props.append((prop_name, prop_type, description)) - return enriched_props - - def retrieve_entity_properties(self, entity: str) -> str: - """ - Retrieve properties for an entitiy like person, session, or one of the groups. - """ - if entity not in ("person", "session", *[group.group_type for group in self._groups]): - return f"Entity {entity} does not exist in the taxonomy." - - if entity == "person": - qs = PropertyDefinition.objects.filter(team=self._team, type=PropertyDefinition.Type.PERSON).values_list( - "name", "property_type" - ) - props = self._enrich_props_with_descriptions("person", qs) - elif entity == "session": - # Session properties are not in the DB. - props = self._enrich_props_with_descriptions( - "session", - [ - (prop_name, prop["type"]) - for prop_name, prop in CORE_FILTER_DEFINITIONS_BY_GROUP["session_properties"].items() - if prop.get("type") is not None - ], - ) - else: - group_type_index = next( - (group.group_type_index for group in self._groups if group.group_type == entity), None - ) - if group_type_index is None: - return f"Group {entity} does not exist in the taxonomy." - qs = PropertyDefinition.objects.filter( - team=self._team, type=PropertyDefinition.Type.GROUP, group_type_index=group_type_index - ).values_list("name", "property_type") - props = self._enrich_props_with_descriptions(entity, qs) - - if not props: - return f"Properties do not exist in the taxonomy for the entity {entity}." - - return self._generate_properties_xml(props) - - def retrieve_event_properties(self, event_name: str) -> str: - """ - Retrieve properties for an event. - """ - runner = EventTaxonomyQueryRunner(EventTaxonomyQuery(event=event_name), self._team) - response = runner.run(ExecutionMode.RECENT_CACHE_CALCULATE_ASYNC_IF_STALE_AND_BLOCKING_ON_MISS) - - if not isinstance(response, CachedEventTaxonomyQueryResponse): - return "Properties have not been found." - - if not response.results: - return f"Properties do not exist in the taxonomy for the event {event_name}." - - # Intersect properties with their types. - qs = PropertyDefinition.objects.filter( - team=self._team, type=PropertyDefinition.Type.EVENT, name__in=[item.property for item in response.results] - ) - property_to_type = {property_definition.name: property_definition.property_type for property_definition in qs} - props = [ - (item.property, property_to_type.get(item.property)) - for item in response.results - # Exclude properties that exist in the taxonomy, but don't have a type. - if item.property in property_to_type - ] - - if not props: - return f"Properties do not exist in the taxonomy for the event {event_name}." - - return self._generate_properties_xml(self._enrich_props_with_descriptions("event", props)) - - def _format_property_values( - self, sample_values: list, sample_count: Optional[int] = 0, format_as_string: bool = False - ) -> str: - if len(sample_values) == 0 or sample_count == 0: - return f"The property does not have any values in the taxonomy." - - # Add quotes to the String type, so the LLM can easily infer a type. - # Strings like "true" or "10" are interpreted as booleans or numbers without quotes, so the schema generation fails. - # Remove the floating point the value is an integer. - formatted_sample_values: list[str] = [] - for value in sample_values: - if format_as_string: - formatted_sample_values.append(f'"{value}"') - elif isinstance(value, float) and value.is_integer(): - formatted_sample_values.append(str(int(value))) - else: - formatted_sample_values.append(str(value)) - prop_values = ", ".join(formatted_sample_values) - - # If there wasn't an exact match with the user's search, we provide a hint that LLM can use an arbitrary value. - if sample_count is None: - return f"{prop_values} and many more distinct values." - elif sample_count > len(sample_values): - diff = sample_count - len(sample_values) - return f"{prop_values} and {diff} more distinct value{'' if diff == 1 else 's'}." - - return prop_values - - def retrieve_event_property_values(self, event_name: str, property_name: str) -> str: - try: - property_definition = PropertyDefinition.objects.get( - team=self._team, name=property_name, type=PropertyDefinition.Type.EVENT - ) - except PropertyDefinition.DoesNotExist: - return f"The property {property_name} does not exist in the taxonomy." - - runner = EventTaxonomyQueryRunner(EventTaxonomyQuery(event=event_name), self._team) - response = runner.run(ExecutionMode.RECENT_CACHE_CALCULATE_ASYNC_IF_STALE_AND_BLOCKING_ON_MISS) - - if not isinstance(response, CachedEventTaxonomyQueryResponse): - return f"The event {event_name} does not exist in the taxonomy." - - if not response.results: - return f"Property values for {property_name} do not exist in the taxonomy for the event {event_name}." - - prop = next((item for item in response.results if item.property == property_name), None) - if not prop: - return f"The property {property_name} does not exist in the taxonomy for the event {event_name}." - - return self._format_property_values( - prop.sample_values, - prop.sample_count, - format_as_string=property_definition.property_type in (PropertyType.String, PropertyType.Datetime), - ) - - def _retrieve_session_properties(self, property_name: str) -> str: - """ - Sessions properties example property values are hardcoded. - """ - if property_name not in CORE_FILTER_DEFINITIONS_BY_GROUP["session_properties"]: - return f"The property {property_name} does not exist in the taxonomy." - - sample_values: list[str | int | float] - if property_name == "$channel_type": - sample_values = cast(list[str | int | float], DEFAULT_CHANNEL_TYPES.copy()) - sample_count = len(sample_values) - is_str = True - elif ( - property_name in CORE_FILTER_DEFINITIONS_BY_GROUP["session_properties"] - and "examples" in CORE_FILTER_DEFINITIONS_BY_GROUP["session_properties"][property_name] - ): - sample_values = CORE_FILTER_DEFINITIONS_BY_GROUP["session_properties"][property_name]["examples"] - sample_count = None - is_str = ( - CORE_FILTER_DEFINITIONS_BY_GROUP["session_properties"][property_name]["type"] == PropertyType.String - ) - else: - return f"Property values for {property_name} do not exist in the taxonomy for the session entity." - - return self._format_property_values(sample_values, sample_count, format_as_string=is_str) - - def retrieve_entity_property_values(self, entity: str, property_name: str) -> str: - if entity not in self._entity_names: - return f"The entity {entity} does not exist in the taxonomy. You must use one of the following: {', '.join(self._entity_names)}." - - if entity == "session": - return self._retrieve_session_properties(property_name) - - if entity == "person": - query = ActorsPropertyTaxonomyQuery(property=property_name) - else: - group_index = next((group.group_type_index for group in self._groups if group.group_type == entity), None) - if group_index is None: - return f"The entity {entity} does not exist in the taxonomy." - query = ActorsPropertyTaxonomyQuery(group_type_index=group_index, property=property_name) - - try: - if query.group_type_index is not None: - prop_type = PropertyDefinition.Type.GROUP - group_type_index = query.group_type_index - else: - prop_type = PropertyDefinition.Type.PERSON - group_type_index = None - - property_definition = PropertyDefinition.objects.get( - team=self._team, - name=property_name, - type=prop_type, - group_type_index=group_type_index, - ) - except PropertyDefinition.DoesNotExist: - return f"The property {property_name} does not exist in the taxonomy for the entity {entity}." - - response = ActorsPropertyTaxonomyQueryRunner(query, self._team).run( - ExecutionMode.RECENT_CACHE_CALCULATE_ASYNC_IF_STALE_AND_BLOCKING_ON_MISS - ) - - if not isinstance(response, CachedActorsPropertyTaxonomyQueryResponse): - return f"The entity {entity} does not exist in the taxonomy." - - if not response.results: - return f"Property values for {property_name} do not exist in the taxonomy for the entity {entity}." - - return self._format_property_values( - response.results.sample_values, - response.results.sample_count, - format_as_string=property_definition.property_type in (PropertyType.String, PropertyType.Datetime), - ) - - def handle_incorrect_response(self, response: str) -> str: - """ - No-op tool. Take a parsing error and return a response that the LLM can use to correct itself. - Used to control a number of retries. - """ - return response diff --git a/ee/hogai/test/__init__.py b/ee/hogai/test/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/hogai/test/test_assistant.py b/ee/hogai/test/test_assistant.py deleted file mode 100644 index 525c4ff134..0000000000 --- a/ee/hogai/test/test_assistant.py +++ /dev/null @@ -1,692 +0,0 @@ -import json -from typing import Any, Optional, cast -from unittest.mock import patch - -import pytest -from langchain_core import messages -from langchain_core.agents import AgentAction -from langchain_core.runnables import RunnableConfig, RunnableLambda -from langgraph.graph.state import CompiledStateGraph -from langgraph.types import StateSnapshot -from pydantic import BaseModel - -from ee.hogai.funnels.nodes import FunnelsSchemaGeneratorOutput -from ee.hogai.memory import prompts as memory_prompts -from ee.hogai.router.nodes import RouterOutput -from ee.hogai.trends.nodes import TrendsSchemaGeneratorOutput -from ee.models.assistant import Conversation, CoreMemory -from posthog.schema import ( - AssistantFunnelsEventsNode, - AssistantFunnelsQuery, - AssistantMessage, - AssistantTrendsQuery, - FailureMessage, - HumanMessage, - ReasoningMessage, - RouterMessage, - VisualizationMessage, -) -from posthog.test.base import ClickhouseTestMixin, NonAtomicBaseTest, _create_event, _create_person - -from ..assistant import Assistant -from ..graph import AssistantGraph, AssistantNodeName - - -class TestAssistant(ClickhouseTestMixin, NonAtomicBaseTest): - CLASS_DATA_LEVEL_SETUP = False - - def setUp(self): - super().setUp() - self.conversation = Conversation.objects.create(team=self.team, user=self.user) - self.core_memory = CoreMemory.objects.create( - team=self.team, - text="Initial memory.", - initial_text="Initial memory.", - scraping_status=CoreMemory.ScrapingStatus.COMPLETED, - ) - - def _set_up_onboarding_tests(self): - self.core_memory.delete() - _create_person( - distinct_ids=["person1"], - team=self.team, - ) - _create_event( - event="$pageview", - distinct_id="person1", - team=self.team, - properties={"$host": "us.posthog.com"}, - ) - - def _parse_stringified_message(self, message: str) -> tuple[str, Any]: - event_line, data_line, *_ = cast(str, message).split("\n") - return (event_line.removeprefix("event: "), json.loads(data_line.removeprefix("data: "))) - - def _run_assistant_graph( - self, - test_graph: Optional[CompiledStateGraph] = None, - message: Optional[str] = "Hello", - conversation: Optional[Conversation] = None, - is_new_conversation: bool = False, - ) -> list[tuple[str, Any]]: - # Create assistant instance with our test graph - assistant = Assistant( - self.team, - conversation or self.conversation, - HumanMessage(content=message), - self.user, - is_new_conversation=is_new_conversation, - ) - if test_graph: - assistant._graph = test_graph - # Capture and parse output of assistant.stream() - output: list[tuple[str, Any]] = [] - for message in assistant.stream(): - output.append(self._parse_stringified_message(message)) - return output - - def assertConversationEqual(self, output: list[tuple[str, Any]], expected_output: list[tuple[str, Any]]): - for i, ((output_msg_type, output_msg), (expected_msg_type, expected_msg)) in enumerate( - zip(output, expected_output) - ): - self.assertEqual(output_msg_type, expected_msg_type, f"Message type mismatch at index {i}") - msg_dict = ( - expected_msg.model_dump(exclude_none=True) if isinstance(expected_msg, BaseModel) else expected_msg - ) - self.assertDictContainsSubset(msg_dict, output_msg, f"Message content mismatch at index {i}") - - @patch( - "ee.hogai.trends.nodes.TrendsPlannerNode.run", - return_value={"intermediate_steps": [(AgentAction(tool="final_answer", tool_input="Plan", log=""), None)]}, - ) - @patch( - "ee.hogai.summarizer.nodes.SummarizerNode.run", return_value={"messages": [AssistantMessage(content="Foobar")]} - ) - def test_reasoning_messages_added(self, _mock_summarizer_run, _mock_funnel_planner_run): - output = self._run_assistant_graph( - AssistantGraph(self.team) - .add_edge(AssistantNodeName.START, AssistantNodeName.TRENDS_PLANNER) - .add_trends_planner(AssistantNodeName.SUMMARIZER) - .add_summarizer(AssistantNodeName.END) - .compile(), - conversation=self.conversation, - ) - - # Assert that ReasoningMessages are added - expected_output = [ - ( - "message", - HumanMessage(content="Hello").model_dump(exclude_none=True), - ), - ( - "message", - { - "type": "ai/reasoning", - "content": "Picking relevant events and properties", # For TrendsPlannerNode - "substeps": [], - }, - ), - ( - "message", - { - "type": "ai/reasoning", - "content": "Picking relevant events and properties", # For TrendsPlannerToolsNode - "substeps": [], - }, - ), - ( - "message", - { - "type": "ai", - "content": "Foobar", # Summarizer merits no ReasoningMessage, we output its results outright - }, - ), - ] - self.assertConversationEqual(output, expected_output) - - @patch( - "ee.hogai.trends.nodes.TrendsPlannerNode.run", - return_value={ - "intermediate_steps": [ - # Compare with toolkit.py to see supported AgentAction shapes. The list below is supposed to include ALL - (AgentAction(tool="retrieve_entity_properties", tool_input="session", log=""), None), - (AgentAction(tool="retrieve_event_properties", tool_input="$pageview", log=""), None), - ( - AgentAction( - tool="retrieve_event_property_values", - tool_input={"event_name": "purchase", "property_name": "currency"}, - log="", - ), - None, - ), - ( - AgentAction( - tool="retrieve_entity_property_values", - tool_input={"entity": "person", "property_name": "country_of_birth"}, - log="", - ), - None, - ), - (AgentAction(tool="handle_incorrect_response", tool_input="", log=""), None), - (AgentAction(tool="final_answer", tool_input="Plan", log=""), None), - ] - }, - ) - def test_reasoning_messages_with_substeps_added(self, _mock_funnel_planner_run): - output = self._run_assistant_graph( - AssistantGraph(self.team) - .add_edge(AssistantNodeName.START, AssistantNodeName.TRENDS_PLANNER) - .add_trends_planner(AssistantNodeName.END) - .compile(), - conversation=self.conversation, - ) - - # Assert that ReasoningMessages are added - expected_output = [ - ( - "message", - HumanMessage(content="Hello").model_dump(exclude_none=True), - ), - ( - "message", - { - "type": "ai/reasoning", - "content": "Picking relevant events and properties", # For TrendsPlannerNode - "substeps": [], - }, - ), - ( - "message", - { - "type": "ai/reasoning", - "content": "Picking relevant events and properties", # For TrendsPlannerToolsNode - "substeps": [ - "Exploring session properties", - "Exploring `$pageview` event's properties", - "Analyzing `currency` event's property `purchase`", - "Analyzing person property `country_of_birth`", - ], - }, - ), - ] - self.assertConversationEqual(output, expected_output) - - def _test_human_in_the_loop(self, graph: CompiledStateGraph): - with patch("ee.hogai.taxonomy_agent.nodes.TaxonomyAgentPlannerNode._model") as mock: - config: RunnableConfig = { - "configurable": { - "thread_id": self.conversation.id, - } - } - - # Interrupt the graph - message = """ - Thought: Let's ask for help. - Action: - ``` - { - "action": "ask_user_for_help", - "action_input": "Need help with this query" - } - ``` - """ - mock.return_value = RunnableLambda(lambda _: messages.AIMessage(content=message)) - output = self._run_assistant_graph(graph, conversation=self.conversation) - expected_output = [ - ("message", HumanMessage(content="Hello")), - ("message", ReasoningMessage(content="Picking relevant events and properties", substeps=[])), - ("message", ReasoningMessage(content="Picking relevant events and properties", substeps=[])), - ("message", AssistantMessage(content="Need help with this query")), - ] - self.assertConversationEqual(output, expected_output) - snapshot: StateSnapshot = graph.get_state(config) - self.assertTrue(snapshot.next) - self.assertIn("intermediate_steps", snapshot.values) - - # Resume the graph from the interruption point. - message = """ - Thought: Finish. - Action: - ``` - { - "action": "final_answer", - "action_input": "Plan" - } - ``` - """ - mock.return_value = RunnableLambda(lambda _: messages.AIMessage(content=message)) - output = self._run_assistant_graph(graph, conversation=self.conversation, message="It's straightforward") - expected_output = [ - ("message", HumanMessage(content="It's straightforward")), - ("message", ReasoningMessage(content="Picking relevant events and properties", substeps=[])), - ("message", ReasoningMessage(content="Picking relevant events and properties", substeps=[])), - ] - self.assertConversationEqual(output, expected_output) - snapshot: StateSnapshot = graph.get_state(config) - self.assertFalse(snapshot.next) - self.assertEqual(snapshot.values.get("intermediate_steps"), []) - self.assertEqual(snapshot.values["plan"], "Plan") - - def test_trends_interrupt_when_asking_for_help(self): - graph = ( - AssistantGraph(self.team) - .add_edge(AssistantNodeName.START, AssistantNodeName.TRENDS_PLANNER) - .add_trends_planner(AssistantNodeName.END) - .compile() - ) - self._test_human_in_the_loop(graph) - - def test_funnels_interrupt_when_asking_for_help(self): - graph = ( - AssistantGraph(self.team) - .add_edge(AssistantNodeName.START, AssistantNodeName.FUNNEL_PLANNER) - .add_funnel_planner(AssistantNodeName.END) - .compile() - ) - self._test_human_in_the_loop(graph) - - def test_messages_are_updated_after_feedback(self): - with patch("ee.hogai.taxonomy_agent.nodes.TaxonomyAgentPlannerNode._model") as mock: - graph = ( - AssistantGraph(self.team) - .add_edge(AssistantNodeName.START, AssistantNodeName.TRENDS_PLANNER) - .add_trends_planner(AssistantNodeName.END) - .compile() - ) - config: RunnableConfig = { - "configurable": { - "thread_id": self.conversation.id, - } - } - - # Interrupt the graph - message = """ - Thought: Let's ask for help. - Action: - ``` - { - "action": "ask_user_for_help", - "action_input": "Need help with this query" - } - ``` - """ - mock.return_value = RunnableLambda(lambda _: messages.AIMessage(content=message)) - self._run_assistant_graph(graph, conversation=self.conversation) - snapshot: StateSnapshot = graph.get_state(config) - self.assertTrue(snapshot.next) - self.assertIn("intermediate_steps", snapshot.values) - self.assertEqual(len(snapshot.values["intermediate_steps"]), 1) - action, observation = snapshot.values["intermediate_steps"][0] - self.assertEqual(action.tool, "ask_user_for_help") - self.assertIsNone(observation) - self.assertNotIn("resumed", snapshot.values) - - self._run_assistant_graph(graph, conversation=self.conversation, message="It's straightforward") - snapshot: StateSnapshot = graph.get_state(config) - self.assertTrue(snapshot.next) - self.assertIn("intermediate_steps", snapshot.values) - self.assertEqual(len(snapshot.values["intermediate_steps"]), 2) - action, observation = snapshot.values["intermediate_steps"][0] - self.assertEqual(action.tool, "ask_user_for_help") - self.assertEqual(observation, "It's straightforward") - action, observation = snapshot.values["intermediate_steps"][1] - self.assertEqual(action.tool, "ask_user_for_help") - self.assertIsNone(observation) - self.assertFalse(snapshot.values["resumed"]) - - def test_resuming_uses_saved_state(self): - with patch("ee.hogai.taxonomy_agent.nodes.TaxonomyAgentPlannerNode._model") as mock: - graph = ( - AssistantGraph(self.team) - .add_edge(AssistantNodeName.START, AssistantNodeName.FUNNEL_PLANNER) - .add_funnel_planner(AssistantNodeName.END) - .compile() - ) - config: RunnableConfig = { - "configurable": { - "thread_id": self.conversation.id, - } - } - - # Interrupt the graph - message = """ - Thought: Let's ask for help. - Action: - ``` - { - "action": "ask_user_for_help", - "action_input": "Need help with this query" - } - ``` - """ - mock.return_value = RunnableLambda(lambda _: messages.AIMessage(content=message)) - - self._run_assistant_graph(graph, conversation=self.conversation) - state: StateSnapshot = graph.get_state(config).values - self.assertIn("start_id", state) - self.assertIsNotNone(state["start_id"]) - - self._run_assistant_graph(graph, conversation=self.conversation, message="It's straightforward") - state: StateSnapshot = graph.get_state(config).values - self.assertIn("start_id", state) - self.assertIsNotNone(state["start_id"]) - - def test_new_conversation_handles_serialized_conversation(self): - graph = ( - AssistantGraph(self.team) - .add_node(AssistantNodeName.ROUTER, lambda _: {"messages": [AssistantMessage(content="Hello")]}) - .add_edge(AssistantNodeName.START, AssistantNodeName.ROUTER) - .add_edge(AssistantNodeName.ROUTER, AssistantNodeName.END) - .compile() - ) - output = self._run_assistant_graph( - graph, - conversation=self.conversation, - is_new_conversation=True, - ) - expected_output = [ - ("conversation", {"id": str(self.conversation.id)}), - ] - self.assertConversationEqual(output[:1], expected_output) - - output = self._run_assistant_graph( - graph, - conversation=self.conversation, - is_new_conversation=False, - ) - self.assertNotEqual(output[0][0], "conversation") - - @pytest.mark.asyncio - async def test_async_stream(self): - graph = ( - AssistantGraph(self.team) - .add_node(AssistantNodeName.ROUTER, lambda _: {"messages": [AssistantMessage(content="bar")]}) - .add_edge(AssistantNodeName.START, AssistantNodeName.ROUTER) - .add_edge(AssistantNodeName.ROUTER, AssistantNodeName.END) - .compile() - ) - assistant = Assistant(self.team, self.conversation, HumanMessage(content="foo")) - assistant._graph = graph - - expected_output = [ - ("message", HumanMessage(content="foo")), - ("message", ReasoningMessage(content="Identifying type of analysis")), - ("message", AssistantMessage(content="bar")), - ] - actual_output = [self._parse_stringified_message(message) async for message in assistant._astream()] - self.assertConversationEqual(actual_output, expected_output) - - @pytest.mark.asyncio - async def test_async_stream_handles_exceptions(self): - def node_handler(state): - raise ValueError() - - graph = ( - AssistantGraph(self.team) - .add_node(AssistantNodeName.ROUTER, node_handler) - .add_edge(AssistantNodeName.START, AssistantNodeName.ROUTER) - .add_edge(AssistantNodeName.ROUTER, AssistantNodeName.END) - .compile() - ) - assistant = Assistant(self.team, self.conversation, HumanMessage(content="foo")) - assistant._graph = graph - - expected_output = [ - ("message", HumanMessage(content="foo")), - ("message", ReasoningMessage(content="Identifying type of analysis")), - ("message", FailureMessage()), - ] - actual_output = [] - with self.assertRaises(ValueError): - async for message in assistant._astream(): - actual_output.append(self._parse_stringified_message(message)) - self.assertConversationEqual(actual_output, expected_output) - - @patch("ee.hogai.summarizer.nodes.SummarizerNode._model") - @patch("ee.hogai.schema_generator.nodes.SchemaGeneratorNode._model") - @patch("ee.hogai.taxonomy_agent.nodes.TaxonomyAgentPlannerNode._model") - @patch("ee.hogai.router.nodes.RouterNode._model") - @patch("ee.hogai.memory.nodes.MemoryCollectorNode._model", return_value=messages.AIMessage(content="[Done]")) - def test_full_trends_flow(self, memory_collector_mock, router_mock, planner_mock, generator_mock, summarizer_mock): - router_mock.return_value = RunnableLambda(lambda _: RouterOutput(visualization_type="trends")) - planner_mock.return_value = RunnableLambda( - lambda _: messages.AIMessage( - content=""" - Thought: Done. - Action: - ``` - { - "action": "final_answer", - "action_input": "Plan" - } - ``` - """ - ) - ) - query = AssistantTrendsQuery(series=[]) - generator_mock.return_value = RunnableLambda(lambda _: TrendsSchemaGeneratorOutput(query=query)) - summarizer_mock.return_value = RunnableLambda(lambda _: AssistantMessage(content="Summary")) - - # First run - actual_output = self._run_assistant_graph(is_new_conversation=True) - expected_output = [ - ("conversation", {"id": str(self.conversation.id)}), - ("message", HumanMessage(content="Hello")), - ("message", ReasoningMessage(content="Identifying type of analysis")), - ("message", RouterMessage(content="trends")), - ("message", ReasoningMessage(content="Picking relevant events and properties", substeps=[])), - ("message", ReasoningMessage(content="Picking relevant events and properties", substeps=[])), - ("message", ReasoningMessage(content="Creating trends query")), - ("message", VisualizationMessage(answer=query, plan="Plan")), - ("message", AssistantMessage(content="Summary")), - ] - self.assertConversationEqual(actual_output, expected_output) - self.assertEqual(actual_output[1][1]["id"], actual_output[7][1]["initiator"]) - - # Second run - actual_output = self._run_assistant_graph(is_new_conversation=False) - self.assertConversationEqual(actual_output, expected_output[1:]) - self.assertEqual(actual_output[0][1]["id"], actual_output[6][1]["initiator"]) - - # Third run - actual_output = self._run_assistant_graph(is_new_conversation=False) - self.assertConversationEqual(actual_output, expected_output[1:]) - self.assertEqual(actual_output[0][1]["id"], actual_output[6][1]["initiator"]) - - @patch("ee.hogai.summarizer.nodes.SummarizerNode._model") - @patch("ee.hogai.schema_generator.nodes.SchemaGeneratorNode._model") - @patch("ee.hogai.taxonomy_agent.nodes.TaxonomyAgentPlannerNode._model") - @patch("ee.hogai.router.nodes.RouterNode._model") - @patch("ee.hogai.memory.nodes.MemoryCollectorNode._model", return_value=messages.AIMessage(content="[Done]")) - def test_full_funnel_flow(self, memory_collector_mock, router_mock, planner_mock, generator_mock, summarizer_mock): - router_mock.return_value = RunnableLambda(lambda _: RouterOutput(visualization_type="funnel")) - planner_mock.return_value = RunnableLambda( - lambda _: messages.AIMessage( - content=""" - Thought: Done. - Action: - ``` - { - "action": "final_answer", - "action_input": "Plan" - } - ``` - """ - ) - ) - query = AssistantFunnelsQuery( - series=[ - AssistantFunnelsEventsNode(event="$pageview"), - AssistantFunnelsEventsNode(event="$pageleave"), - ] - ) - generator_mock.return_value = RunnableLambda(lambda _: FunnelsSchemaGeneratorOutput(query=query)) - summarizer_mock.return_value = RunnableLambda(lambda _: AssistantMessage(content="Summary")) - - # First run - actual_output = self._run_assistant_graph(is_new_conversation=True) - expected_output = [ - ("conversation", {"id": str(self.conversation.id)}), - ("message", HumanMessage(content="Hello")), - ("message", ReasoningMessage(content="Identifying type of analysis")), - ("message", RouterMessage(content="funnel")), - ("message", ReasoningMessage(content="Picking relevant events and properties", substeps=[])), - ("message", ReasoningMessage(content="Picking relevant events and properties", substeps=[])), - ("message", ReasoningMessage(content="Creating funnel query")), - ("message", VisualizationMessage(answer=query, plan="Plan")), - ("message", AssistantMessage(content="Summary")), - ] - self.assertConversationEqual(actual_output, expected_output) - self.assertEqual(actual_output[1][1]["id"], actual_output[7][1]["initiator"]) - - # Second run - actual_output = self._run_assistant_graph(is_new_conversation=False) - self.assertConversationEqual(actual_output, expected_output[1:]) - self.assertEqual(actual_output[0][1]["id"], actual_output[6][1]["initiator"]) - - # Third run - actual_output = self._run_assistant_graph(is_new_conversation=False) - self.assertConversationEqual(actual_output, expected_output[1:]) - self.assertEqual(actual_output[0][1]["id"], actual_output[6][1]["initiator"]) - - @patch("ee.hogai.memory.nodes.MemoryInitializerInterruptNode._model") - @patch("ee.hogai.memory.nodes.MemoryInitializerNode._model") - def test_onboarding_flow_accepts_memory(self, model_mock, interruption_model_mock): - self._set_up_onboarding_tests() - - # Mock the memory initializer to return a product description - model_mock.return_value = RunnableLambda(lambda _: "PostHog is a product analytics platform.") - interruption_model_mock.return_value = RunnableLambda(lambda _: "PostHog is a product analytics platform.") - - # Create a graph with memory initialization flow - graph = AssistantGraph(self.team).add_memory_initializer(AssistantNodeName.END).compile() - - # First run - get the product description - output = self._run_assistant_graph(graph, is_new_conversation=True) - expected_output = [ - ("conversation", {"id": str(self.conversation.id)}), - ("message", HumanMessage(content="Hello")), - ( - "message", - AssistantMessage( - content=memory_prompts.SCRAPING_INITIAL_MESSAGE, - ), - ), - ("message", AssistantMessage(content="PostHog is a product analytics platform.")), - ("message", AssistantMessage(content=memory_prompts.SCRAPING_VERIFICATION_MESSAGE)), - ] - self.assertConversationEqual(output, expected_output) - - # Second run - accept the memory - output = self._run_assistant_graph( - graph, - message=memory_prompts.SCRAPING_CONFIRMATION_MESSAGE, - is_new_conversation=False, - ) - expected_output = [ - ("message", HumanMessage(content=memory_prompts.SCRAPING_CONFIRMATION_MESSAGE)), - ( - "message", - AssistantMessage(content=memory_prompts.SCRAPING_MEMORY_SAVED_MESSAGE), - ), - ("message", ReasoningMessage(content="Identifying type of analysis")), - ] - self.assertConversationEqual(output, expected_output) - - # Verify the memory was saved - core_memory = CoreMemory.objects.get(team=self.team) - self.assertEqual(core_memory.scraping_status, CoreMemory.ScrapingStatus.COMPLETED) - self.assertIsNotNone(core_memory.text) - - @patch("ee.hogai.memory.nodes.MemoryInitializerNode._model") - def test_onboarding_flow_rejects_memory(self, model_mock): - self._set_up_onboarding_tests() - - # Mock the memory initializer to return a product description - model_mock.return_value = RunnableLambda(lambda _: "PostHog is a product analytics platform.") - - # Create a graph with memory initialization flow - graph = AssistantGraph(self.team).add_memory_initializer(AssistantNodeName.END).compile() - - # First run - get the product description - output = self._run_assistant_graph(graph, is_new_conversation=True) - expected_output = [ - ("conversation", {"id": str(self.conversation.id)}), - ("message", HumanMessage(content="Hello")), - ( - "message", - AssistantMessage( - content=memory_prompts.SCRAPING_INITIAL_MESSAGE, - ), - ), - ("message", AssistantMessage(content="PostHog is a product analytics platform.")), - ("message", AssistantMessage(content=memory_prompts.SCRAPING_VERIFICATION_MESSAGE)), - ] - self.assertConversationEqual(output, expected_output) - - # Second run - reject the memory - output = self._run_assistant_graph( - graph, - message=memory_prompts.SCRAPING_REJECTION_MESSAGE, - is_new_conversation=False, - ) - expected_output = [ - ("message", HumanMessage(content=memory_prompts.SCRAPING_REJECTION_MESSAGE)), - ( - "message", - AssistantMessage( - content=memory_prompts.SCRAPING_TERMINATION_MESSAGE, - ), - ), - ("message", ReasoningMessage(content="Identifying type of analysis")), - ] - self.assertConversationEqual(output, expected_output) - - # Verify the memory was skipped - core_memory = CoreMemory.objects.get(team=self.team) - self.assertEqual(core_memory.scraping_status, CoreMemory.ScrapingStatus.SKIPPED) - self.assertEqual(core_memory.text, "") - - @patch("ee.hogai.memory.nodes.MemoryCollectorNode._model") - def test_memory_collector_flow(self, model_mock): - # Create a graph with just memory collection - graph = ( - AssistantGraph(self.team).add_memory_collector(AssistantNodeName.END).add_memory_collector_tools().compile() - ) - - # Mock the memory collector to first analyze and then append memory - def memory_collector_side_effect(prompt): - prompt_messages = prompt.to_messages() - if len(prompt_messages) == 2: # First run - return messages.AIMessage( - content="Let me analyze that.", - tool_calls=[ - { - "id": "1", - "name": "core_memory_append", - "args": {"memory_content": "The product uses a subscription model."}, - } - ], - ) - else: # Second run - return messages.AIMessage(content="Processing complete. [Done]") - - model_mock.return_value = RunnableLambda(memory_collector_side_effect) - - # First run - analyze and append memory - output = self._run_assistant_graph( - graph, - message="We use a subscription model", - is_new_conversation=True, - ) - expected_output = [ - ("conversation", {"id": str(self.conversation.id)}), - ("message", HumanMessage(content="We use a subscription model")), - ("message", AssistantMessage(content="Let me analyze that.")), - ("message", AssistantMessage(content="Memory appended.")), - ] - self.assertConversationEqual(output, expected_output) - - # Verify memory was appended - self.core_memory.refresh_from_db() - self.assertIn("The product uses a subscription model.", self.core_memory.text) diff --git a/ee/hogai/test/test_utils.py b/ee/hogai/test/test_utils.py deleted file mode 100644 index 8c32471c88..0000000000 --- a/ee/hogai/test/test_utils.py +++ /dev/null @@ -1,74 +0,0 @@ -from ee.hogai.utils.helpers import filter_messages -from posthog.schema import ( - AssistantMessage, - AssistantTrendsQuery, - FailureMessage, - HumanMessage, - RouterMessage, - VisualizationMessage, -) -from posthog.test.base import BaseTest - - -class TestTrendsUtils(BaseTest): - def test_filters_and_merges_human_messages(self): - conversation = [ - HumanMessage(content="Text"), - FailureMessage(content="Error"), - HumanMessage(content="Text"), - VisualizationMessage(answer=AssistantTrendsQuery(series=[]), plan="plan"), - HumanMessage(content="Text2"), - VisualizationMessage(answer=None, plan="plan"), - ] - messages = filter_messages(conversation) - self.assertEqual(len(messages), 4) - self.assertEqual( - [ - HumanMessage(content="Text\nText"), - VisualizationMessage(answer=AssistantTrendsQuery(series=[]), plan="plan"), - HumanMessage(content="Text2"), - VisualizationMessage(answer=None, plan="plan"), - ], - messages, - ) - - def test_filters_typical_conversation(self): - messages = filter_messages( - [ - HumanMessage(content="Question 1"), - RouterMessage(content="trends"), - VisualizationMessage(answer=AssistantTrendsQuery(series=[]), plan="Plan 1"), - AssistantMessage(content="Summary 1"), - HumanMessage(content="Question 2"), - RouterMessage(content="funnel"), - VisualizationMessage(answer=AssistantTrendsQuery(series=[]), plan="Plan 2"), - AssistantMessage(content="Summary 2"), - ] - ) - self.assertEqual(len(messages), 6) - self.assertEqual( - messages, - [ - HumanMessage(content="Question 1"), - VisualizationMessage(answer=AssistantTrendsQuery(series=[]), plan="Plan 1"), - AssistantMessage(content="Summary 1"), - HumanMessage(content="Question 2"), - VisualizationMessage(answer=AssistantTrendsQuery(series=[]), plan="Plan 2"), - AssistantMessage(content="Summary 2"), - ], - ) - - def test_joins_human_messages(self): - messages = filter_messages( - [ - HumanMessage(content="Question 1"), - HumanMessage(content="Question 2"), - ] - ) - self.assertEqual(len(messages), 1) - self.assertEqual( - messages, - [ - HumanMessage(content="Question 1\nQuestion 2"), - ], - ) diff --git a/ee/hogai/trends/__init__.py b/ee/hogai/trends/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/hogai/trends/nodes.py b/ee/hogai/trends/nodes.py deleted file mode 100644 index e430b4036e..0000000000 --- a/ee/hogai/trends/nodes.py +++ /dev/null @@ -1,50 +0,0 @@ -from langchain_core.prompts import ChatPromptTemplate -from langchain_core.runnables import RunnableConfig - -from ee.hogai.schema_generator.nodes import SchemaGeneratorNode, SchemaGeneratorToolsNode -from ee.hogai.schema_generator.utils import SchemaGeneratorOutput -from ee.hogai.taxonomy_agent.nodes import TaxonomyAgentPlannerNode, TaxonomyAgentPlannerToolsNode -from ee.hogai.trends.prompts import REACT_SYSTEM_PROMPT, TRENDS_SYSTEM_PROMPT -from ee.hogai.trends.toolkit import TRENDS_SCHEMA, TrendsTaxonomyAgentToolkit -from ee.hogai.utils.types import AssistantState, PartialAssistantState -from posthog.schema import AssistantTrendsQuery - - -class TrendsPlannerNode(TaxonomyAgentPlannerNode): - def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistantState: - toolkit = TrendsTaxonomyAgentToolkit(self._team) - prompt = ChatPromptTemplate.from_messages( - [ - ("system", REACT_SYSTEM_PROMPT), - ], - template_format="mustache", - ) - return super()._run_with_prompt_and_toolkit(state, prompt, toolkit, config=config) - - -class TrendsPlannerToolsNode(TaxonomyAgentPlannerToolsNode): - def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistantState: - toolkit = TrendsTaxonomyAgentToolkit(self._team) - return super()._run_with_toolkit(state, toolkit, config=config) - - -TrendsSchemaGeneratorOutput = SchemaGeneratorOutput[AssistantTrendsQuery] - - -class TrendsGeneratorNode(SchemaGeneratorNode[AssistantTrendsQuery]): - INSIGHT_NAME = "Trends" - OUTPUT_MODEL = TrendsSchemaGeneratorOutput - OUTPUT_SCHEMA = TRENDS_SCHEMA - - def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistantState: - prompt = ChatPromptTemplate.from_messages( - [ - ("system", TRENDS_SYSTEM_PROMPT), - ], - template_format="mustache", - ) - return super()._run_with_prompt(state, prompt, config=config) - - -class TrendsGeneratorToolsNode(SchemaGeneratorToolsNode): - pass diff --git a/ee/hogai/trends/prompts.py b/ee/hogai/trends/prompts.py deleted file mode 100644 index b04f00c15b..0000000000 --- a/ee/hogai/trends/prompts.py +++ /dev/null @@ -1,193 +0,0 @@ -REACT_SYSTEM_PROMPT = """ -<agent_info> -You are an expert product analyst agent specializing in data visualization and trends analysis. Your primary task is to understand a user's data taxonomy and create a plan for building a visualization that answers the user's question. This plan should focus on trends insights, including a series of events, property filters, and values of property filters. - -{{core_memory_instructions}} - -{{react_format}} -</agent_info> - -<core_memory> -{{core_memory}} -</core_memory> - -{{react_human_in_the_loop}} - -Below you will find information on how to correctly discover the taxonomy of the user's data. - -<general_knowledge> -Trends insights enable users to plot data from people, events, and properties however they want. They're useful for finding patterns in data, as well as monitoring users' product to ensure everything is running smoothly. Users can use multiple independent series in a single query to see trends. They can also use a formula to calculate a metric. Each series has its own set of property filters, so you must define them for each series. Trends insights do not require breakdowns or filters by default. -</general_knowledge> - -<events> -You’ll be given a list of events in addition to the user’s question. Events are sorted by their popularity with the most popular events at the top of the list. Prioritize popular events. You must always specify events to use. Events always have an associated user’s profile. Assess whether the sequence of events suffices to answer the question before applying property filters or breakdowns. -</events> - -<aggregation> -**Determine the math aggregation** the user is asking for, such as totals, averages, ratios, or custom formulas. If not specified, choose a reasonable default based on the event type (e.g., total count). By default, the total count should be used. You can aggregate data by events, event's property values,{{#groups}} {{.}}s,{{/groups}} or users. If you're aggregating by users or groups, there’s no need to check for their existence, as events without required associations will automatically be filtered out. - -Available math aggregations types for the event count are: -- total count -- average -- minimum -- maximum -- median -- 90th percentile -- 95th percentile -- 99th percentile -- unique users -- weekly active users -- daily active users -- first time for a user -{{#groups}} -- unique {{.}}s -{{/groups}} - -Available math aggregation types for event's property values are: -- average -- sum -- minimum -- maximum -- median -- 90th percentile -- 95th percentile -- 99th percentile - -Available math aggregation types counting number of events completed per user (intensity of usage) are: -- average -- minimum -- maximum -- median -- 90th percentile -- 95th percentile -- 99th percentile - -Examples of using aggregation types: -- `unique users` to find how many distinct users have logged the event per a day. -- `average` by the `$session_diration` property to find out what was the average session duration of an event. -- `99th percentile by users` to find out what was the 99th percentile of the event count by users. -</aggregation> - -<math_formulas> -If the math aggregation is more complex or not listed above, use custom formulas to perform mathematical operations like calculating percentages or metrics. If you use a formula, you must use the following syntax: `A/B`, where `A` and `B` are the names of the series. You can combine math aggregations and formulas. - -When using a formula, you must: -- Identify and specify **all** events needed to solve the formula. -- Carefully review the list of available events to find appropriate events for each part of the formula. -- Ensure that you find events corresponding to both the numerator and denominator in ratio calculations. - -Examples of using math formulas: -- If you want to calculate the percentage of users who have completed onboarding, you need to find and use events similar to `$identify` and `onboarding complete`, so the formula will be `A / B`, where `A` is `onboarding complete` (unique users) and `B` is `$identify` (unique users). -</math_formulas> - -{{react_property_filters}} - -<breakdowns> -Breakdowns are used to segment data by property values of maximum three properties. They divide all defined trends series to multiple subseries based on the values of the property. Include breakdowns **only when they are essential to directly answer the user’s question**. You must not add breakdowns if the question can be addressed without additional segmentation. Always use the minimum set of breakdowns needed to answer the question. - -When using breakdowns, you must: -- **Identify the property group** and name for each breakdown. -- **Provide the property name** for each breakdown. -- **Validate that the property value accurately reflects the intended criteria**. - -Examples of using breakdowns: -- page views trend by country: you need to find a property such as `$geoip_country_code` and set it as a breakdown. -- number of users who have completed onboarding by an organization: you need to find a property such as `organization name` and set it as a breakdown. -</breakdowns> - -<reminders> -- Ensure that any properties or breakdowns included are directly relevant to the context and objectives of the user’s question. Avoid unnecessary or unrelated details. -- Avoid overcomplicating the response with excessive property filters or breakdowns. Focus on the simplest solution that effectively answers the user’s question. -</reminders> ---- - -{{react_format_reminder}} -""" - -TRENDS_SYSTEM_PROMPT = """ -Act as an expert product manager. Your task is to generate a JSON schema of trends insights. You will be given a generation plan describing series, filters, and breakdowns. Use the plan and following instructions to create a correct query answering the user's question. - -Below is the additional context. - -Follow this instruction to create a query: -* Build series according to the plan. The plan includes event, math types, property filters, and breakdowns. Properties can be of multiple types: String, Numeric, Bool, and DateTime. A property can be an array of those types and only has a single type. -* When evaluating filter operators, replace the `equals` or `doesn't equal` operators with `contains` or `doesn't contain` if the query value is likely a personal name, company name, or any other name-sensitive term where letter casing matters. For instance, if the value is β€˜John Doe’ or β€˜Acme Corp’, replace `equals` with `contains` and change the value to lowercase from `John Doe` to `john doe` or `Acme Corp` to `acme corp`. -* Determine a visualization type that will answer the user's question in the best way. -* Determine if the user wants to name the series or use the default names. -* Choose the date range and the interval the user wants to analyze. -* Determine if the user wants to compare the results to a previous period or use smoothing. -* Determine if the user wants to filter out internal and test users. If the user didn't specify, filter out internal and test users by default. -* Determine if the user wants to use a sampling factor. -* Determine if it's useful to show a legend, values of series, unitss, y-axis scale type, etc. -* Use your judgment if there are any other parameters that the user might want to adjust that aren't listed here. - -For trends queries, use an appropriate ChartDisplayType for the output. For example: -- if the user wants to see dynamics in time like a line graph, use `ActionsLineGraph`. -- if the user wants to see cumulative dynamics across time, use `ActionsLineGraphCumulative`. -- if the user asks a question where you can answer with a single number, use `BoldNumber`. -- if the user wants a table, use `ActionsTable`. -- if the data is categorical, use `ActionsBar`. -- if the data is easy to understand in a pie chart, use `ActionsPie`. -- if the user has only one series and wants to see data from particular countries, use `WorldMap`. - -The user might want to get insights for groups. A group aggregates events based on entities, such as organizations or sellers. The user might provide a list of group names and their numeric indexes. Instead of a group's name, always use its numeric index. - -You can determine if a feature flag is enabled by checking if it's set to true or 1 in the `$feature/...` property. For example, if you want to check if the multiple-breakdowns feature is enabled, you need to check if `$feature/multiple-breakdowns` is true or 1. - -## Schema Examples - -### How many users do I have? - -``` -{"dateRange":{"date_from":"all"},"interval":"month","kind":"TrendsQuery","series":[{"event":"user signed up","kind":"EventsNode","math":"total"}],"trendsFilter":{"display":"BoldNumber"}} -``` - -### Show a bar chart of the organic search traffic for the last month grouped by week. - -``` -{"dateRange":{"date_from":"-30d","date_to":null,"explicitDate":false},"interval":"week","kind":"TrendsQuery","series":[{"event":"$pageview","kind":"EventsNode","math":"dau","properties":[{"key":"$referring_domain","operator":"icontains","type":"event","value":"google"},{"key":"utm_source","operator":"is_not_set","type":"event","value":"is_not_set"}]}],"trendsFilter":{"display":"ActionsBar"}} -``` - -### insight created unique users & first-time users for the last 12m) - -``` -{"dateRange":{"date_from":"-12m","date_to":""},"filterTestAccounts":true,"interval":"month","kind":"TrendsQuery","series":[{"event":"insight created","kind":"EventsNode","math":"dau","custom_name":"insight created"},{"event":"insight created","kind":"EventsNode","math":"first_time_for_user","custom_name":"insight created"}],"trendsFilter":{"display":"ActionsLineGraph"}} -``` - -### What are the top 10 referring domains for the last month? - -``` -{"breakdownFilter":{"breakdown_type":"event","breakdowns":[{"group_type_index":null,"histogram_bin_count":null,"normalize_url":null,"property":"$referring_domain","type":"event"}]},"dateRange":{"date_from":"-30d"},"interval":"day","kind":"TrendsQuery","series":[{"event":"$pageview","kind":"EventsNode","math":"total","custom_name":"$pageview"}]} -``` - -### What is the DAU to MAU ratio of users from the US and Australia that viewed a page in the last 7 days? Compare it to the previous period. - -``` -{"compareFilter":{"compare":true,"compare_to":null},"dateRange":{"date_from":"-7d"},"interval":"day","kind":"TrendsQuery","properties":{"type":"AND","values":[{"type":"AND","values":[{"key":"$geoip_country_name","operator":"exact","type":"event","value":["United States","Australia"]}]}]},"series":[{"event":"$pageview","kind":"EventsNode","math":"dau","custom_name":"$pageview"},{"event":"$pageview","kind":"EventsNode","math":"monthly_active","custom_name":"$pageview"}],"trendsFilter":{"aggregationAxisFormat":"percentage_scaled","display":"ActionsLineGraph","formula":"A/B"}} -``` - -### I want to understand how old are dashboard results when viewed from the beginning of this year grouped by a month. Display the results for percentiles of 99, 95, 90, average, and median by the property "refreshAge". - -``` -{"dateRange":{"date_from":"yStart","date_to":null,"explicitDate":false},"filterTestAccounts":true,"interval":"month","kind":"TrendsQuery","series":[{"event":"viewed dashboard","kind":"EventsNode","math":"p99","math_property":"refreshAge","custom_name":"viewed dashboard"},{"event":"viewed dashboard","kind":"EventsNode","math":"p95","math_property":"refreshAge","custom_name":"viewed dashboard"},{"event":"viewed dashboard","kind":"EventsNode","math":"p90","math_property":"refreshAge","custom_name":"viewed dashboard"},{"event":"viewed dashboard","kind":"EventsNode","math":"avg","math_property":"refreshAge","custom_name":"viewed dashboard"},{"event":"viewed dashboard","kind":"EventsNode","math":"median","math_property":"refreshAge","custom_name":"viewed dashboard"}],"trendsFilter":{"aggregationAxisFormat":"duration","display":"ActionsLineGraph"}} -``` - -### organizations joined in the last 30 days by day from the google search - -``` -{"dateRange":{"date_from":"-30d"},"filterTestAccounts":false,"interval":"day","kind":"TrendsQuery","properties":{"type":"AND","values":[{"type":"OR","values":[{"key":"$initial_utm_source","operator":"exact","type":"person","value":["google"]}]}]},"series":[{"event":"user signed up","kind":"EventsNode","math":"unique_group","math_group_type_index":0,"name":"user signed up","properties":[{"key":"is_organization_first_user","operator":"exact","type":"person","value":["true"]}]}],"trendsFilter":{"display":"ActionsLineGraph"}} -``` - -### trends for the last two weeks of the onboarding completed event by unique projects with a session duration more than 5 minutes and the insight analyzed event by unique projects with a breakdown by event's Country Name. exclude the US. - -``` -{"kind":"TrendsQuery","series":[{"kind":"EventsNode","event":"onboarding completed","name":"onboarding completed","properties":[{"key":"$session_duration","value":300,"operator":"gt","type":"session"}],"math":"unique_group","math_group_type_index":2},{"kind":"EventsNode","event":"insight analyzed","name":"insight analyzed","math":"unique_group","math_group_type_index":2}],"trendsFilter":{"display":"ActionsBar","showValuesOnSeries":true,"showPercentStackView":false,"showLegend":false},"breakdownFilter":{"breakdowns":[{"property":"$geoip_country_name","type":"event"}],"breakdown_limit":5},"properties":{"type":"AND","values":[{"type":"AND","values":[{"key":"$geoip_country_code","value":["US"],"operator":"is_not","type":"event"}]}]},"dateRange":{"date_from":"-14d","date_to":null},"interval":"day"} -``` - -Obey these rules: -- if the date range is not specified, use the best judgment to select a reasonable date range. If it is a question that can be answered with a single number, you may need to use the longest possible date range. -- Filter internal users by default if the user doesn't specify. -- Only use events and properties defined by the user. You can't create new events or property definitions. - -Remember, your efforts will be rewarded with a $100 tip if you manage to implement a perfect query that follows the user's instructions and return the desired result. Do not hallucinate. -""" diff --git a/ee/hogai/trends/test/__init__.py b/ee/hogai/trends/test/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/hogai/trends/test/test_nodes.py b/ee/hogai/trends/test/test_nodes.py deleted file mode 100644 index 004ab58408..0000000000 --- a/ee/hogai/trends/test/test_nodes.py +++ /dev/null @@ -1,44 +0,0 @@ -from unittest.mock import patch - -from django.test import override_settings -from langchain_core.runnables import RunnableLambda - -from ee.hogai.trends.nodes import TrendsGeneratorNode, TrendsSchemaGeneratorOutput -from ee.hogai.utils.types import AssistantState, PartialAssistantState -from posthog.schema import ( - AssistantTrendsQuery, - HumanMessage, - VisualizationMessage, -) -from posthog.test.base import APIBaseTest, ClickhouseTestMixin - - -@override_settings(IN_UNIT_TESTING=True) -class TestTrendsGeneratorNode(ClickhouseTestMixin, APIBaseTest): - maxDiff = None - - def setUp(self): - super().setUp() - self.schema = AssistantTrendsQuery(series=[]) - - def test_node_runs(self): - node = TrendsGeneratorNode(self.team) - with patch.object(TrendsGeneratorNode, "_model") as generator_model_mock: - generator_model_mock.return_value = RunnableLambda( - lambda _: TrendsSchemaGeneratorOutput(query=self.schema).model_dump() - ) - new_state = node.run( - AssistantState( - messages=[HumanMessage(content="Text")], - plan="Plan", - ), - {}, - ) - self.assertEqual( - new_state, - PartialAssistantState( - messages=[VisualizationMessage(answer=self.schema, plan="Plan", id=new_state.messages[0].id)], - intermediate_steps=[], - plan="", - ), - ) diff --git a/ee/hogai/trends/test/test_prompt.py b/ee/hogai/trends/test/test_prompt.py deleted file mode 100644 index f44fd46553..0000000000 --- a/ee/hogai/trends/test/test_prompt.py +++ /dev/null @@ -1,21 +0,0 @@ -from langchain_core.prompts import ChatPromptTemplate - -from ee.hogai.trends.prompts import REACT_SYSTEM_PROMPT -from posthog.test.base import BaseTest - - -class TestTrendsPrompts(BaseTest): - def test_planner_prompt_has_groups(self): - prompt = ChatPromptTemplate.from_messages( - [ - ("system", REACT_SYSTEM_PROMPT), - ], - template_format="mustache", - ).format( - groups=["org", "account"], - react_format="", - react_format_reminder="", - ) - self.assertIn("orgs, accounts,", prompt) - self.assertIn("unique orgs", prompt) - self.assertIn("unique accounts", prompt) diff --git a/ee/hogai/trends/toolkit.py b/ee/hogai/trends/toolkit.py deleted file mode 100644 index 5fd7a35f0f..0000000000 --- a/ee/hogai/trends/toolkit.py +++ /dev/null @@ -1,74 +0,0 @@ -from ee.hogai.taxonomy_agent.toolkit import TaxonomyAgentToolkit, ToolkitTool -from ee.hogai.utils.helpers import dereference_schema -from posthog.schema import AssistantTrendsQuery - - -class TrendsTaxonomyAgentToolkit(TaxonomyAgentToolkit): - def _get_tools(self) -> list[ToolkitTool]: - return [ - *self._default_tools, - { - "name": "final_answer", - "signature": "(final_response: str)", - "description": """ - Use this tool to provide the final answer to the user's question. - - Answer in the following format: - ``` - Events: - - event 1 - - math operation: total - - property filter 1: - - entity - - property name - - property type - - operator - - property value - - property filter 2... Repeat for each property filter. - - event 2 - - math operation: average by `property name`. - - property filter 1: - - entity - - property name - - property type - - operator - - property value - - property filter 2... Repeat for each property filter. - - Repeat for each event. - - (if a formula is used) - Formula: - `A/B`, where `A` is the first event and `B` is the second event. - - (if a breakdown is used) - Breakdown by: - - breakdown 1: - - entity - - property name - - Repeat for each breakdown. - ``` - - Args: - final_response: List all events and properties that you want to use to answer the question. - """, - }, - ] - - -def generate_trends_schema() -> dict: - schema = AssistantTrendsQuery.model_json_schema() - return { - "name": "output_insight_schema", - "description": "Outputs the JSON schema of a trends insight", - "parameters": { - "type": "object", - "properties": { - "query": dereference_schema(schema), - }, - "additionalProperties": False, - "required": ["query"], - }, - } - - -TRENDS_SCHEMA = generate_trends_schema() diff --git a/ee/hogai/utils/__init__.py b/ee/hogai/utils/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/hogai/utils/asgi.py b/ee/hogai/utils/asgi.py deleted file mode 100644 index a613ac8bb1..0000000000 --- a/ee/hogai/utils/asgi.py +++ /dev/null @@ -1,34 +0,0 @@ -from collections.abc import AsyncIterator, Callable, Iterable, Iterator -from typing import TypeVar - -from asgiref.sync import sync_to_async - -T = TypeVar("T") - - -class SyncIterableToAsync(AsyncIterator[T]): - def __init__(self, iterable: Iterable[T]) -> None: - self._iterable: Iterable[T] = iterable - # async versions of the `next` and `iter` functions - self.next_async: Callable = sync_to_async(self.next, thread_sensitive=False) - self.iter_async: Callable = sync_to_async(iter, thread_sensitive=False) - self.sync_iterator: Iterator[T] | None = None - - def __aiter__(self) -> AsyncIterator[T]: - return self - - async def __anext__(self) -> T: - if self.sync_iterator is None: - self.sync_iterator = await self.iter_async(self._iterable) - return await self.next_async(self.sync_iterator) - - @staticmethod - def next(it: Iterator[T]) -> T: - """ - asyncio expects `StopAsyncIteration` in place of `StopIteration`, - so here's a modified in-built `next` function that can handle this. - """ - try: - return next(it) - except StopIteration: - raise StopAsyncIteration diff --git a/ee/hogai/utils/helpers.py b/ee/hogai/utils/helpers.py deleted file mode 100644 index b09e439c6a..0000000000 --- a/ee/hogai/utils/helpers.py +++ /dev/null @@ -1,79 +0,0 @@ -from collections.abc import Sequence -from typing import Optional, TypeVar, Union - -from jsonref import replace_refs -from langchain_core.messages import ( - HumanMessage as LangchainHumanMessage, - merge_message_runs, -) - -from posthog.schema import ( - AssistantMessage, - HumanMessage, - VisualizationMessage, -) - -from .types import AssistantMessageUnion - - -def remove_line_breaks(line: str) -> str: - return line.replace("\n", " ") - - -def filter_messages( - messages: Sequence[AssistantMessageUnion], - entity_filter: Union[tuple[type[AssistantMessageUnion], ...], type[AssistantMessageUnion]] = ( - AssistantMessage, - VisualizationMessage, - ), -) -> list[AssistantMessageUnion]: - """ - Filters and merges the message history to be consumable by agents. Returns human and AI messages. - """ - stack: list[LangchainHumanMessage] = [] - filtered_messages: list[AssistantMessageUnion] = [] - - def _merge_stack(stack: list[LangchainHumanMessage]) -> list[HumanMessage]: - return [ - HumanMessage(content=langchain_message.content, id=langchain_message.id) - for langchain_message in merge_message_runs(stack) - ] - - for message in messages: - if isinstance(message, HumanMessage): - stack.append(LangchainHumanMessage(content=message.content, id=message.id)) - elif isinstance(message, entity_filter): - if stack: - filtered_messages += _merge_stack(stack) - stack = [] - filtered_messages.append(message) - - if stack: - filtered_messages += _merge_stack(stack) - - return filtered_messages - - -T = TypeVar("T", bound=AssistantMessageUnion) - - -def find_last_message_of_type(messages: Sequence[AssistantMessageUnion], message_type: type[T]) -> Optional[T]: - return next((msg for msg in reversed(messages) if isinstance(msg, message_type)), None) - - -def slice_messages_to_conversation_start( - messages: Sequence[AssistantMessageUnion], start_id: Optional[str] = None -) -> Sequence[AssistantMessageUnion]: - result = [] - for msg in messages: - result.append(msg) - if msg.id == start_id: - break - return result - - -def dereference_schema(schema: dict) -> dict: - new_schema: dict = replace_refs(schema, proxies=False, lazy_load=False) - if "$defs" in new_schema: - new_schema.pop("$defs") - return new_schema diff --git a/ee/hogai/utils/markdown.py b/ee/hogai/utils/markdown.py deleted file mode 100644 index 279fc17ffb..0000000000 --- a/ee/hogai/utils/markdown.py +++ /dev/null @@ -1,111 +0,0 @@ -from collections.abc import Sequence -from html.parser import HTMLParser -from inspect import getmembers, ismethod - -from markdown_it import MarkdownIt -from markdown_it.renderer import RendererProtocol -from markdown_it.token import Token -from markdown_it.utils import EnvType, OptionsDict - - -# Taken from https://github.com/elespike/mdit_plain/blob/main/src/mdit_plain/renderer.py -class HTMLTextRenderer(HTMLParser): - def __init__(self): - super().__init__() - self._handled_data = [] - - def handle_data(self, data): - self._handled_data.append(data) - - def reset(self): - self._handled_data = [] - super().reset() - - def render(self, html): - self.feed(html) - rendered_data = "".join(self._handled_data) - self.reset() - return rendered_data - - -class RendererPlain(RendererProtocol): - __output__ = "plain" - - def __init__(self, parser=None): - self.parser = parser - self.htmlparser = HTMLTextRenderer() - self.rules = { - func_name.replace("render_", ""): func - for func_name, func in getmembers(self, predicate=ismethod) - if func_name.startswith("render_") - } - - def render(self, tokens: Sequence[Token], options: OptionsDict, env: EnvType): - result = "" - for i, token in enumerate(tokens): - rule = self.rules.get(token.type, self.render_default) - result += rule(tokens, i, options, env) - if token.children is not None: - result += self.render(token.children, options, env) - return result.strip() - - def render_default(self, tokens, i, options, env): - return "" - - def render_bullet_list_close(self, tokens, i, options, env): - if (i + 1) == len(tokens) or "list" in tokens[i + 1].type: - return "" - return "\n" - - def render_code_block(self, tokens, i, options, env): - return f"\n{tokens[i].content}\n" - - def render_code_inline(self, tokens, i, options, env): - return tokens[i].content - - def render_fence(self, tokens, i, options, env): - return f"\n{tokens[i].content}\n" - - def render_hardbreak(self, tokens, i, options, env): - return "\n" - - def render_heading_close(self, tokens, i, options, env): - return "\n" - - def render_heading_open(self, tokens, i, options, env): - return "\n" - - def render_html_block(self, tokens, i, options, env): - return self.htmlparser.render(tokens[i].content) - - def render_list_item_open(self, tokens, i, options, env): - next_token = tokens[i + 1] - if hasattr(next_token, "hidden") and not next_token.hidden: - return "" - return "\n" - - def render_ordered_list_close(self, tokens, i, options, env): - if (i + 1) == len(tokens) or "list" in tokens[i + 1].type: - return "" - return "\n" - - def render_paragraph_close(self, tokens, i, options, env): - if tokens[i].hidden: - return "" - return "\n" - - def render_paragraph_open(self, tokens, i, options, env): - if tokens[i].hidden: - return "" - return "\n" - - def render_softbreak(self, tokens, i, options, env): - return "\n" - - def render_text(self, tokens, i, options, env): - return tokens[i].content - - -def remove_markdown(text: str) -> str: - parser = MarkdownIt(renderer_cls=RendererPlain) - return parser.render(text) diff --git a/ee/hogai/utils/nodes.py b/ee/hogai/utils/nodes.py deleted file mode 100644 index b727e643e3..0000000000 --- a/ee/hogai/utils/nodes.py +++ /dev/null @@ -1,32 +0,0 @@ -from abc import ABC, abstractmethod - -from langchain_core.runnables import RunnableConfig - -from ee.models.assistant import CoreMemory -from posthog.models.team.team import Team - -from .types import AssistantState, PartialAssistantState - - -class AssistantNode(ABC): - _team: Team - - def __init__(self, team: Team): - self._team = team - - @abstractmethod - def run(cls, state: AssistantState, config: RunnableConfig) -> PartialAssistantState | None: - raise NotImplementedError - - @property - def core_memory(self) -> CoreMemory | None: - try: - return CoreMemory.objects.get(team=self._team) - except CoreMemory.DoesNotExist: - return None - - @property - def core_memory_text(self) -> str: - if not self.core_memory: - return "" - return self.core_memory.formatted_text diff --git a/ee/hogai/utils/state.py b/ee/hogai/utils/state.py deleted file mode 100644 index 3392f3362a..0000000000 --- a/ee/hogai/utils/state.py +++ /dev/null @@ -1,70 +0,0 @@ -from typing import Any, Literal, TypedDict, TypeGuard, Union - -from langchain_core.messages import AIMessageChunk - -from ee.hogai.utils.types import AssistantNodeName, AssistantState, PartialAssistantState - -# A state update can have a partial state or a LangGraph's reserved dataclasses like Interrupt. -GraphValueUpdate = dict[AssistantNodeName, dict[Any, Any] | Any] - -GraphValueUpdateTuple = tuple[Literal["values"], GraphValueUpdate] - - -def is_value_update(update: list[Any]) -> TypeGuard[GraphValueUpdateTuple]: - """ - Transition between nodes. - - Returns: - PartialAssistantState, Interrupt, or other LangGraph reserved dataclasses. - """ - return len(update) == 2 and update[0] == "updates" - - -def validate_value_update(update: GraphValueUpdate) -> dict[AssistantNodeName, PartialAssistantState | Any]: - validated_update = {} - for node_name, value in update.items(): - if isinstance(value, dict): - validated_update[node_name] = PartialAssistantState.model_validate(value) - else: - validated_update[node_name] = value - return validated_update - - -class LangGraphState(TypedDict): - langgraph_node: AssistantNodeName - - -GraphMessageUpdateTuple = tuple[Literal["messages"], tuple[Union[AIMessageChunk, Any], LangGraphState]] - - -def is_message_update(update: list[Any]) -> TypeGuard[GraphMessageUpdateTuple]: - """ - Streaming of messages. - """ - return len(update) == 2 and update[0] == "messages" - - -GraphStateUpdateTuple = tuple[Literal["updates"], dict[Any, Any]] - - -def is_state_update(update: list[Any]) -> TypeGuard[GraphStateUpdateTuple]: - """ - Update of the state. Returns a full state. - """ - return len(update) == 2 and update[0] == "values" - - -def validate_state_update(state_update: dict[Any, Any]) -> AssistantState: - return AssistantState.model_validate(state_update) - - -GraphTaskStartedUpdateTuple = tuple[Literal["debug"], tuple[Union[AIMessageChunk, Any], LangGraphState]] - - -def is_task_started_update( - update: list[Any], -) -> TypeGuard[GraphTaskStartedUpdateTuple]: - """ - Streaming of messages. - """ - return len(update) == 2 and update[0] == "debug" and update[1]["type"] == "task" diff --git a/ee/hogai/utils/test/test_assistant_node.py b/ee/hogai/utils/test/test_assistant_node.py deleted file mode 100644 index 16946db36c..0000000000 --- a/ee/hogai/utils/test/test_assistant_node.py +++ /dev/null @@ -1,31 +0,0 @@ -from langchain_core.runnables import RunnableConfig - -from ee.hogai.utils.nodes import AssistantNode -from ee.hogai.utils.types import AssistantState, PartialAssistantState -from ee.models.assistant import CoreMemory -from posthog.test.base import BaseTest - - -class TestAssistantNode(BaseTest): - def setUp(self): - super().setUp() - - class Node(AssistantNode): - def run(self, state: AssistantState, config: RunnableConfig) -> PartialAssistantState | None: - raise NotImplementedError - - self.node = Node(self.team) - - def test_core_memory_when_exists(self): - core_memory = CoreMemory.objects.create(team=self.team, text="Test memory") - self.assertEqual(self.node.core_memory, core_memory) - - def test_core_memory_when_does_not_exist(self): - self.assertIsNone(self.node.core_memory) - - def test_product_core_memory_when_exists(self): - CoreMemory.objects.create(team=self.team, text="Test memory") - self.assertEqual(self.node.core_memory_text, "Test memory") - - def test_product_core_memory_when_does_not_exist(self): - self.assertEqual(self.node.core_memory_text, "") diff --git a/ee/hogai/utils/types.py b/ee/hogai/utils/types.py deleted file mode 100644 index 2b92ecdedc..0000000000 --- a/ee/hogai/utils/types.py +++ /dev/null @@ -1,74 +0,0 @@ -import operator -from collections.abc import Sequence -from enum import StrEnum -from typing import Annotated, Optional, Union - -from langchain_core.agents import AgentAction -from langchain_core.messages import BaseMessage as LangchainBaseMessage -from langgraph.graph import END, START -from pydantic import BaseModel, Field - -from posthog.schema import ( - AssistantMessage, - FailureMessage, - HumanMessage, - ReasoningMessage, - RouterMessage, - VisualizationMessage, -) - -AIMessageUnion = Union[AssistantMessage, VisualizationMessage, FailureMessage, RouterMessage, ReasoningMessage] -AssistantMessageUnion = Union[HumanMessage, AIMessageUnion] - - -class _SharedAssistantState(BaseModel): - intermediate_steps: Optional[list[tuple[AgentAction, Optional[str]]]] = Field(default=None) - start_id: Optional[str] = Field(default=None) - """ - The ID of the message from which the conversation started. - """ - plan: Optional[str] = Field(default=None) - resumed: Optional[bool] = Field(default=None) - """ - Whether the agent was resumed after interruption, such as a human in the loop. - """ - memory_updated: Optional[bool] = Field(default=None) - """ - Whether the memory was updated in the `MemoryCollectorNode`. - """ - memory_collection_messages: Optional[Sequence[LangchainBaseMessage]] = Field(default=None) - """ - The messages with tool calls to collect memory in the `MemoryCollectorToolsNode`. - """ - - -class AssistantState(_SharedAssistantState): - messages: Annotated[Sequence[AssistantMessageUnion], operator.add] - - -class PartialAssistantState(_SharedAssistantState): - messages: Optional[Sequence[AssistantMessageUnion]] = Field(default=None) - - -class AssistantNodeName(StrEnum): - START = START - END = END - MEMORY_ONBOARDING = "memory_onboarding" - MEMORY_INITIALIZER = "memory_initializer" - MEMORY_INITIALIZER_INTERRUPT = "memory_initializer_interrupt" - ROUTER = "router" - TRENDS_PLANNER = "trends_planner" - TRENDS_PLANNER_TOOLS = "trends_planner_tools" - TRENDS_GENERATOR = "trends_generator" - TRENDS_GENERATOR_TOOLS = "trends_generator_tools" - FUNNEL_PLANNER = "funnel_planner" - FUNNEL_PLANNER_TOOLS = "funnel_planner_tools" - FUNNEL_GENERATOR = "funnel_generator" - FUNNEL_GENERATOR_TOOLS = "funnel_generator_tools" - RETENTION_PLANNER = "retention_planner" - RETENTION_PLANNER_TOOLS = "retention_planner_tools" - RETENTION_GENERATOR = "retention_generator" - RETENTION_GENERATOR_TOOLS = "retention_generator_tools" - SUMMARIZER = "summarizer" - MEMORY_COLLECTOR = "memory_collector" - MEMORY_COLLECTOR_TOOLS = "memory_collector_tools" diff --git a/ee/management/commands/materialize_columns.py b/ee/management/commands/materialize_columns.py deleted file mode 100644 index 6d54f8362f..0000000000 --- a/ee/management/commands/materialize_columns.py +++ /dev/null @@ -1,111 +0,0 @@ -import argparse -import logging - -from django.core.management.base import BaseCommand - -from ee.clickhouse.materialized_columns.analyze import ( - logger, - materialize_properties_task, -) -from ee.clickhouse.materialized_columns.columns import DEFAULT_TABLE_COLUMN -from posthog.settings import ( - MATERIALIZE_COLUMNS_ANALYSIS_PERIOD_HOURS, - MATERIALIZE_COLUMNS_BACKFILL_PERIOD_DAYS, - MATERIALIZE_COLUMNS_MAX_AT_ONCE, - MATERIALIZE_COLUMNS_MINIMUM_QUERY_TIME, -) - - -class Command(BaseCommand): - help = "Materialize properties into columns in clickhouse" - - def add_arguments(self, parser): - parser.add_argument("--dry-run", action="store_true", help="Print plan instead of executing it") - - parser.add_argument( - "--property", - help="Properties to materialize. Skips analysis. Allows multiple arguments --property abc '$.abc.def'", - nargs="+", - ) - parser.add_argument( - "--property-table", - type=str, - default="events", - choices=["events", "person"], - help="Table of --property", - ) - parser.add_argument( - "--table-column", - help="The column to which --property should be materialised from.", - default=DEFAULT_TABLE_COLUMN, - ) - parser.add_argument( - "--backfill-period", - type=int, - default=MATERIALIZE_COLUMNS_BACKFILL_PERIOD_DAYS, - help="How many days worth of data to backfill. 0 to disable. Same as MATERIALIZE_COLUMNS_BACKFILL_PERIOD_DAYS env variable.", - ) - - parser.add_argument( - "--min-query-time", - type=int, - default=MATERIALIZE_COLUMNS_MINIMUM_QUERY_TIME, - help="Minimum query time (ms) before a query if considered for optimization. Same as MATERIALIZE_COLUMNS_MINIMUM_QUERY_TIME env variable.", - ) - parser.add_argument( - "--analyze-period", - type=int, - default=MATERIALIZE_COLUMNS_ANALYSIS_PERIOD_HOURS, - help="How long of a time period to analyze. Same as MATERIALIZE_COLUMNS_ANALYSIS_PERIOD_HOURS env variable.", - ) - parser.add_argument( - "--analyze-team-id", - type=int, - default=None, - help="Analyze queries only for a specific team_id", - ) - parser.add_argument( - "--max-columns", - type=int, - default=MATERIALIZE_COLUMNS_MAX_AT_ONCE, - help="Max number of columns to materialize via single invocation. Same as MATERIALIZE_COLUMNS_MAX_AT_ONCE env variable.", - ) - parser.add_argument( - "--nullable", - action=argparse.BooleanOptionalAction, - default=True, - dest="is_nullable", - ) - - def handle(self, *, is_nullable: bool, **options): - logger.setLevel(logging.INFO) - - if options["dry_run"]: - logger.warn("Dry run: No changes to the tables will be made!") - - if options.get("property"): - logger.info(f"Materializing column. table={options['property_table']}, property_name={options['property']}") - - materialize_properties_task( - properties_to_materialize=[ - ( - options["property_table"], - options["table_column"], - prop, - ) - for prop in options.get("property") - ], - backfill_period_days=options["backfill_period"], - dry_run=options["dry_run"], - is_nullable=is_nullable, - ) - else: - materialize_properties_task( - time_to_analyze_hours=options["analyze_period"], - maximum=options["max_columns"], - min_query_time=options["min_query_time"], - backfill_period_days=options["backfill_period"], - dry_run=options["dry_run"], - team_id_to_analyze=options["analyze_team_id"], - is_nullable=is_nullable, - ) diff --git a/ee/management/commands/update_materialized_column.py b/ee/management/commands/update_materialized_column.py deleted file mode 100644 index bb55a61545..0000000000 --- a/ee/management/commands/update_materialized_column.py +++ /dev/null @@ -1,31 +0,0 @@ -import logging - -from typing import Any -from collections.abc import Callable, Iterable -from django.core.management.base import BaseCommand, CommandParser - -from posthog.clickhouse.materialized_columns import ColumnName, TablesWithMaterializedColumns -from ee.clickhouse.materialized_columns.columns import update_column_is_disabled, drop_column - -logger = logging.getLogger(__name__) - -COLUMN_OPERATIONS: dict[str, Callable[[TablesWithMaterializedColumns, Iterable[ColumnName]], Any]] = { - "enable": lambda table, column_names: update_column_is_disabled(table, column_names, is_disabled=False), - "disable": lambda table, column_names: update_column_is_disabled(table, column_names, is_disabled=True), - "drop": drop_column, -} - - -class Command(BaseCommand): - def add_arguments(self, parser: CommandParser) -> None: - parser.add_argument("operation", choices=COLUMN_OPERATIONS.keys()) - parser.add_argument("table") - parser.add_argument("column_names", nargs="+", metavar="column") - - def handle( - self, operation: str, table: TablesWithMaterializedColumns, column_names: Iterable[ColumnName], **options - ): - logger.info("Running %r on %r for %r...", operation, table, column_names) - fn = COLUMN_OPERATIONS[operation] - fn(table, column_names) - logger.info("Success!") diff --git a/ee/migrations/0001_initial.py b/ee/migrations/0001_initial.py deleted file mode 100644 index 5b668bc772..0000000000 --- a/ee/migrations/0001_initial.py +++ /dev/null @@ -1,31 +0,0 @@ -# Generated by Django 3.0.7 on 2020-08-07 09:15 - - -from django.db import migrations, models - - -class Migration(migrations.Migration): - initial = True - - dependencies: list = [] - - operations = [ - migrations.CreateModel( - name="License", - fields=[ - ( - "id", - models.AutoField( - auto_created=True, - primary_key=True, - serialize=False, - verbose_name="ID", - ), - ), - ("created_at", models.DateTimeField(auto_now_add=True)), - ("plan", models.CharField(max_length=200)), - ("valid_until", models.DateTimeField()), - ("key", models.CharField(max_length=200)), - ], - ), - ] diff --git a/ee/migrations/0002_hook.py b/ee/migrations/0002_hook.py deleted file mode 100644 index 36516d048a..0000000000 --- a/ee/migrations/0002_hook.py +++ /dev/null @@ -1,59 +0,0 @@ -# Generated by Django 3.0.6 on 2020-08-18 12:10 - -import django.db.models.deletion -from django.conf import settings -from django.db import migrations, models - -import posthog.models.utils - - -class Migration(migrations.Migration): - dependencies = [ - ("posthog", "0082_personalapikey"), - migrations.swappable_dependency(settings.AUTH_USER_MODEL), - ("ee", "0001_initial"), - ] - - operations = [ - migrations.CreateModel( - name="Hook", - fields=[ - ("created", models.DateTimeField(auto_now_add=True)), - ("updated", models.DateTimeField(auto_now=True)), - ( - "event", - models.CharField(db_index=True, max_length=64, verbose_name="Event"), - ), - ("target", models.URLField(max_length=255, verbose_name="Target URL")), - ( - "id", - models.CharField( - default=posthog.models.utils.generate_random_token, - max_length=50, - primary_key=True, - serialize=False, - ), - ), - ("resource_id", models.IntegerField(blank=True, null=True)), - ( - "team", - models.ForeignKey( - on_delete=django.db.models.deletion.CASCADE, - related_name="rest_hooks", - to="posthog.Team", - ), - ), - ( - "user", - models.ForeignKey( - on_delete=django.db.models.deletion.CASCADE, - related_name="rest_hooks", - to=settings.AUTH_USER_MODEL, - ), - ), - ], - options={ - "abstract": False, - }, - ), - ] diff --git a/ee/migrations/0003_license_max_users.py b/ee/migrations/0003_license_max_users.py deleted file mode 100644 index 6760baca0c..0000000000 --- a/ee/migrations/0003_license_max_users.py +++ /dev/null @@ -1,17 +0,0 @@ -# Generated by Django 3.0.11 on 2021-04-14 00:20 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - dependencies = [ - ("ee", "0002_hook"), - ] - - operations = [ - migrations.AddField( - model_name="license", - name="max_users", - field=models.IntegerField(default=None, null=True), - ), - ] diff --git a/ee/migrations/0004_enterpriseeventdefinition_enterprisepropertydefinition.py b/ee/migrations/0004_enterpriseeventdefinition_enterprisepropertydefinition.py deleted file mode 100644 index cd0d2b6b58..0000000000 --- a/ee/migrations/0004_enterpriseeventdefinition_enterprisepropertydefinition.py +++ /dev/null @@ -1,108 +0,0 @@ -# Generated by Django 3.1.8 on 2021-06-02 19:42 - -import django.contrib.postgres.fields -import django.db.models.deletion -from django.conf import settings -from django.db import migrations, models - - -class Migration(migrations.Migration): - dependencies = [ - ("posthog", "0156_insight_short_id"), - migrations.swappable_dependency(settings.AUTH_USER_MODEL), - ("ee", "0003_license_max_users"), - ] - - operations = [ - migrations.CreateModel( - name="EnterprisePropertyDefinition", - fields=[ - ( - "propertydefinition_ptr", - models.OneToOneField( - auto_created=True, - on_delete=django.db.models.deletion.CASCADE, - parent_link=True, - primary_key=True, - serialize=False, - to="posthog.propertydefinition", - ), - ), - ("description", models.CharField(blank=True, max_length=400)), - ( - "tags", - django.contrib.postgres.fields.ArrayField( - base_field=models.CharField(max_length=32), - blank=True, - default=list, - null=True, - size=None, - ), - ), - ("updated_at", models.DateTimeField(auto_now=True)), - ( - "updated_by", - models.ForeignKey( - blank=True, - null=True, - on_delete=django.db.models.deletion.SET_NULL, - to=settings.AUTH_USER_MODEL, - ), - ), - ], - options={ - "abstract": False, - }, - bases=("posthog.propertydefinition",), - ), - migrations.CreateModel( - name="EnterpriseEventDefinition", - fields=[ - ( - "eventdefinition_ptr", - models.OneToOneField( - auto_created=True, - on_delete=django.db.models.deletion.CASCADE, - parent_link=True, - primary_key=True, - serialize=False, - to="posthog.eventdefinition", - ), - ), - ("description", models.CharField(blank=True, max_length=400)), - ( - "tags", - django.contrib.postgres.fields.ArrayField( - base_field=models.CharField(max_length=32), - blank=True, - default=list, - null=True, - size=None, - ), - ), - ("updated_at", models.DateTimeField(auto_now=True)), - ( - "owner", - models.ForeignKey( - null=True, - on_delete=django.db.models.deletion.SET_NULL, - related_name="event_definitions", - to=settings.AUTH_USER_MODEL, - ), - ), - ( - "updated_by", - models.ForeignKey( - blank=True, - null=True, - on_delete=django.db.models.deletion.SET_NULL, - to=settings.AUTH_USER_MODEL, - ), - ), - ], - options={ - "abstract": False, - }, - bases=("posthog.eventdefinition",), - ), - ] diff --git a/ee/migrations/0005_project_based_permissioning.py b/ee/migrations/0005_project_based_permissioning.py deleted file mode 100644 index d785637d17..0000000000 --- a/ee/migrations/0005_project_based_permissioning.py +++ /dev/null @@ -1,63 +0,0 @@ -# Generated by Django 3.2.5 on 2021-09-10 11:39 - -import django.db.models.deletion -from django.conf import settings -from django.db import migrations, models - -import posthog.models.utils - - -class Migration(migrations.Migration): - dependencies = [ - ("posthog", "0170_project_based_permissioning"), - migrations.swappable_dependency(settings.AUTH_USER_MODEL), - ("ee", "0004_enterpriseeventdefinition_enterprisepropertydefinition"), - ] - - operations = [ - migrations.CreateModel( - name="ExplicitTeamMembership", - fields=[ - ( - "id", - models.UUIDField( - default=posthog.models.utils.UUIDT, - editable=False, - primary_key=True, - serialize=False, - ), - ), - ( - "level", - models.PositiveSmallIntegerField(choices=[(1, "member"), (8, "administrator")], default=1), - ), - ("joined_at", models.DateTimeField(auto_now_add=True)), - ("updated_at", models.DateTimeField(auto_now=True)), - ( - "team", - models.ForeignKey( - on_delete=django.db.models.deletion.CASCADE, - related_name="explicit_memberships", - related_query_name="explicit_membership", - to="posthog.team", - ), - ), - ( - "parent_membership", - models.ForeignKey( - on_delete=django.db.models.deletion.CASCADE, - related_name="explicit_team_memberships", - related_query_name="explicit_team_membership", - to="posthog.organizationmembership", - ), - ), - ], - ), - migrations.AddConstraint( - model_name="explicitteammembership", - constraint=models.UniqueConstraint( - fields=("team", "parent_membership"), - name="unique_explicit_team_membership", - ), - ), - ] diff --git a/ee/migrations/0006_event_definition_verification.py b/ee/migrations/0006_event_definition_verification.py deleted file mode 100644 index c86f415d3f..0000000000 --- a/ee/migrations/0006_event_definition_verification.py +++ /dev/null @@ -1,36 +0,0 @@ -# Generated by Django 3.2.5 on 2022-01-17 20:13 - -import django.db.models.deletion -from django.conf import settings -from django.db import migrations, models - - -class Migration(migrations.Migration): - dependencies = [ - migrations.swappable_dependency(settings.AUTH_USER_MODEL), - ("ee", "0005_project_based_permissioning"), - ] - - operations = [ - migrations.AddField( - model_name="enterpriseeventdefinition", - name="verified", - field=models.BooleanField(blank=True, default=False), - ), - migrations.AddField( - model_name="enterpriseeventdefinition", - name="verified_at", - field=models.DateTimeField(blank=True, null=True), - ), - migrations.AddField( - model_name="enterpriseeventdefinition", - name="verified_by", - field=models.ForeignKey( - blank=True, - null=True, - on_delete=django.db.models.deletion.SET_NULL, - related_name="verifying_user", - to=settings.AUTH_USER_MODEL, - ), - ), - ] diff --git a/ee/migrations/0007_dashboard_permissions.py b/ee/migrations/0007_dashboard_permissions.py deleted file mode 100644 index 015498bfca..0000000000 --- a/ee/migrations/0007_dashboard_permissions.py +++ /dev/null @@ -1,67 +0,0 @@ -# Generated by Django 3.2.5 on 2022-01-31 20:50 - -import django.db.models.deletion -from django.conf import settings -from django.db import migrations, models - -import posthog.models.utils - - -class Migration(migrations.Migration): - dependencies = [ - ("posthog", "0203_dashboard_permissions"), - migrations.swappable_dependency(settings.AUTH_USER_MODEL), - ("ee", "0006_event_definition_verification"), - ] - - operations = [ - migrations.CreateModel( - name="DashboardPrivilege", - fields=[ - ( - "id", - models.UUIDField( - default=posthog.models.utils.UUIDT, - editable=False, - primary_key=True, - serialize=False, - ), - ), - ( - "level", - models.PositiveSmallIntegerField( - choices=[ - (21, "Everyone in the project can edit"), - (37, "Only those invited to this dashboard can edit"), - ] - ), - ), - ("added_at", models.DateTimeField(auto_now_add=True)), - ("updated_at", models.DateTimeField(auto_now=True)), - ( - "dashboard", - models.ForeignKey( - on_delete=django.db.models.deletion.CASCADE, - related_name="privileges", - related_query_name="privilege", - to="posthog.dashboard", - ), - ), - ( - "user", - models.ForeignKey( - on_delete=django.db.models.deletion.CASCADE, - related_name="explicit_dashboard_privileges", - related_query_name="explicit_dashboard_privilege", - to=settings.AUTH_USER_MODEL, - ), - ), - ], - ), - migrations.AddConstraint( - model_name="dashboardprivilege", - constraint=models.UniqueConstraint( - fields=("dashboard", "user"), name="unique_explicit_dashboard_privilege" - ), - ), - ] diff --git a/ee/migrations/0008_null_definition_descriptions.py b/ee/migrations/0008_null_definition_descriptions.py deleted file mode 100644 index 1172813b25..0000000000 --- a/ee/migrations/0008_null_definition_descriptions.py +++ /dev/null @@ -1,22 +0,0 @@ -# Generated by Django 3.2.5 on 2022-02-15 20:45 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - dependencies = [ - ("ee", "0007_dashboard_permissions"), - ] - - operations = [ - migrations.AlterField( - model_name="enterpriseeventdefinition", - name="description", - field=models.TextField(blank=True, default="", null=True), - ), - migrations.AlterField( - model_name="enterprisepropertydefinition", - name="description", - field=models.TextField(blank=True, default="", null=True), - ), - ] diff --git a/ee/migrations/0009_deprecated_old_tags.py b/ee/migrations/0009_deprecated_old_tags.py deleted file mode 100644 index c01f76cfd6..0000000000 --- a/ee/migrations/0009_deprecated_old_tags.py +++ /dev/null @@ -1,22 +0,0 @@ -# Generated by Django 3.2.5 on 2022-02-17 18:11 - -from django.db import migrations - - -class Migration(migrations.Migration): - dependencies = [ - ("ee", "0008_null_definition_descriptions"), - ] - - operations = [ - migrations.RenameField( - model_name="enterpriseeventdefinition", - old_name="tags", - new_name="deprecated_tags", - ), - migrations.RenameField( - model_name="enterprisepropertydefinition", - old_name="tags", - new_name="deprecated_tags", - ), - ] diff --git a/ee/migrations/0010_migrate_definitions_tags.py b/ee/migrations/0010_migrate_definitions_tags.py deleted file mode 100644 index 687d746044..0000000000 --- a/ee/migrations/0010_migrate_definitions_tags.py +++ /dev/null @@ -1,21 +0,0 @@ -# Generated by Django 3.2.5 on 2022-01-28 19:21 -from django.db import migrations - - -def forwards(apps, schema_editor): - pass - - -def reverse(apps, schema_editor): - pass - - -class Migration(migrations.Migration): - dependencies = [ - ("ee", "0009_deprecated_old_tags"), - ("posthog", "0213_deprecated_old_tags"), - ] - - operations = [ - migrations.RunPython(forwards, reverse), - ] diff --git a/ee/migrations/0011_add_tags_back.py b/ee/migrations/0011_add_tags_back.py deleted file mode 100644 index 0f5d2ff4f2..0000000000 --- a/ee/migrations/0011_add_tags_back.py +++ /dev/null @@ -1,35 +0,0 @@ -# Generated by Django 3.2.5 on 2022-02-18 18:22 - -import django.contrib.postgres.fields -from django.db import migrations, models - - -class Migration(migrations.Migration): - dependencies = [ - ("ee", "0010_migrate_definitions_tags"), - ] - - operations = [ - migrations.AddField( - model_name="enterpriseeventdefinition", - name="tags", - field=django.contrib.postgres.fields.ArrayField( - base_field=models.CharField(max_length=32), - blank=True, - default=None, - null=True, - size=None, - ), - ), - migrations.AddField( - model_name="enterprisepropertydefinition", - name="tags", - field=django.contrib.postgres.fields.ArrayField( - base_field=models.CharField(max_length=32), - blank=True, - default=None, - null=True, - size=None, - ), - ), - ] diff --git a/ee/migrations/0012_migrate_tags_v2.py b/ee/migrations/0012_migrate_tags_v2.py deleted file mode 100644 index 540cd28133..0000000000 --- a/ee/migrations/0012_migrate_tags_v2.py +++ /dev/null @@ -1,143 +0,0 @@ -# Generated by Django 3.2.5 on 2022-03-02 22:44 -from typing import Any - -from django.core.paginator import Paginator -from django.db import migrations -from django.db.models import Q - -from posthog.models.tag import tagify - - -def forwards(apps, schema_editor): - import structlog - - logger = structlog.get_logger(__name__) - logger.info("ee/0012_migrate_tags_v2_start") - - Tag = apps.get_model("posthog", "Tag") - TaggedItem = apps.get_model("posthog", "TaggedItem") - EnterpriseEventDefinition = apps.get_model("ee", "EnterpriseEventDefinition") - EnterprisePropertyDefinition = apps.get_model("ee", "EnterprisePropertyDefinition") - - createables: list[tuple[Any, Any]] = [] - batch_size = 1_000 - - # Collect event definition tags and taggeditems - event_definition_paginator = Paginator( - EnterpriseEventDefinition.objects.exclude( - Q(deprecated_tags__isnull=True) | Q(deprecated_tags=[]), - ) - .order_by("created_at") - .values_list("deprecated_tags", "team_id", "id"), - batch_size, - ) - - for event_definition_page in event_definition_paginator.page_range: - logger.info( - "event_definition_tag_batch_get_start", - limit=batch_size, - offset=(event_definition_page - 1) * batch_size, - ) - event_definitions = iter(event_definition_paginator.get_page(event_definition_page)) - for tags, team_id, event_definition_id in event_definitions: - unique_tags = {tagify(t) for t in tags if isinstance(t, str) and t.strip() != ""} - for tag in unique_tags: - temp_tag = Tag(name=tag, team_id=team_id) - createables.append( - ( - temp_tag, - TaggedItem(event_definition_id=event_definition_id, tag_id=temp_tag.id), - ) - ) - - logger.info("event_definition_tag_get_end", tags_count=len(createables)) - num_event_definition_tags = len(createables) - - # Collect property definition tags and taggeditems - property_definition_paginator = Paginator( - EnterprisePropertyDefinition.objects.exclude( - Q(deprecated_tags__isnull=True) | Q(deprecated_tags=[]), - ) - .order_by("updated_at") - .values_list("deprecated_tags", "team_id", "id"), - batch_size, - ) - - for property_definition_page in property_definition_paginator.page_range: - logger.info( - "property_definition_tag_batch_get_start", - limit=batch_size, - offset=(property_definition_page - 1) * batch_size, - ) - property_definitions = iter(property_definition_paginator.get_page(property_definition_page)) - for tags, team_id, property_definition_id in property_definitions: - unique_tags = {tagify(t) for t in tags if isinstance(t, str) and t.strip() != ""} - for tag in unique_tags: - temp_tag = Tag(name=tag, team_id=team_id) - createables.append( - ( - temp_tag, - TaggedItem( - property_definition_id=property_definition_id, - tag_id=temp_tag.id, - ), - ) - ) - - logger.info( - "property_definition_tag_get_end", - tags_count=len(createables) - num_event_definition_tags, - ) - - # Consistent ordering to make independent runs non-deterministic - createables = sorted(createables, key=lambda pair: pair[0].name) - - # Attempts to create tags in bulk while ignoring conflicts. bulk_create does not return any data - # about which tags were ignored and created, so we must take care of this manually. - tags_to_create = [tag for (tag, _) in createables] - Tag.objects.bulk_create(tags_to_create, ignore_conflicts=True, batch_size=batch_size) - logger.info("tags_bulk_created") - - # Associate tag ids with tagged_item objects in batches. Best case scenario all tags are new. Worst case - # scenario, all tags already exist and get is made for every tag. - for offset in range(0, len(tags_to_create), batch_size): - logger.info("tagged_item_batch_create_start", limit=batch_size, offset=offset) - batch = tags_to_create[offset : (offset + batch_size)] - - # Find tags that were created, and not already existing - created_tags = Tag.objects.in_bulk([t.id for t in batch]) - - # Tags that are in `tags_to_create` but not in `created_tags` are tags that already exist - # in the db and must be fetched individually. - createable_batch = createables[offset : (offset + batch_size)] - for tag, tagged_item in createable_batch: - if tag.id in created_tags: - tagged_item.tag_id = created_tags[tag.id].id - else: - tagged_item.tag_id = Tag.objects.filter(name=tag.name, team_id=tag.team_id).first().id - - # Create tag <-> item relationships, ignoring conflicts - TaggedItem.objects.bulk_create( - [tagged_item for (_, tagged_item) in createable_batch], - ignore_conflicts=True, - batch_size=batch_size, - ) - - logger.info("ee/0012_migrate_tags_v2_end") - - -def reverse(apps, schema_editor): - TaggedItem = apps.get_model("posthog", "TaggedItem") - TaggedItem.objects.filter(Q(event_definition_id__isnull=False) | Q(property_definition_id__isnull=False)).delete() - # Cascade deletes tag objects - - -class Migration(migrations.Migration): - atomic = False - - dependencies = [ - ("ee", "0011_add_tags_back"), - ("posthog", "0218_uniqueness_constraint_tagged_items"), - ] - - operations = [migrations.RunPython(forwards, reverse)] diff --git a/ee/migrations/0013_silence_deprecated_tags_warnings.py b/ee/migrations/0013_silence_deprecated_tags_warnings.py deleted file mode 100644 index c27f29ef35..0000000000 --- a/ee/migrations/0013_silence_deprecated_tags_warnings.py +++ /dev/null @@ -1,47 +0,0 @@ -# Generated by Django 3.2.13 on 2022-06-23 16:11 - -import django.contrib.postgres.fields -from django.db import migrations, models - - -class Migration(migrations.Migration): - dependencies = [ - ("ee", "0012_migrate_tags_v2"), - ] - - operations = [ - migrations.RenameField( - model_name="enterpriseeventdefinition", - old_name="tags", - new_name="deprecated_tags_v2", - ), - migrations.RenameField( - model_name="enterprisepropertydefinition", - old_name="tags", - new_name="deprecated_tags_v2", - ), - migrations.AlterField( - model_name="enterpriseeventdefinition", - name="deprecated_tags_v2", - field=django.contrib.postgres.fields.ArrayField( - base_field=models.CharField(max_length=32), - blank=True, - db_column="tags", - default=None, - null=True, - size=None, - ), - ), - migrations.AlterField( - model_name="enterprisepropertydefinition", - name="deprecated_tags_v2", - field=django.contrib.postgres.fields.ArrayField( - base_field=models.CharField(max_length=32), - blank=True, - db_column="tags", - default=None, - null=True, - size=None, - ), - ), - ] diff --git a/ee/migrations/0014_roles_memberships_and_resource_access.py b/ee/migrations/0014_roles_memberships_and_resource_access.py deleted file mode 100644 index dd5b0a7468..0000000000 --- a/ee/migrations/0014_roles_memberships_and_resource_access.py +++ /dev/null @@ -1,201 +0,0 @@ -# Generated by Django 3.2.16 on 2022-11-23 17:34 - -import django.db.models.deletion -from django.conf import settings -from django.db import migrations, models - -import posthog.models.utils - - -class Migration(migrations.Migration): - dependencies = [ - ("posthog", "0280_fix_async_deletion_team"), - migrations.swappable_dependency(settings.AUTH_USER_MODEL), - ("ee", "0013_silence_deprecated_tags_warnings"), - ] - - operations = [ - migrations.CreateModel( - name="Role", - fields=[ - ( - "id", - models.UUIDField( - default=posthog.models.utils.UUIDT, - editable=False, - primary_key=True, - serialize=False, - ), - ), - ("name", models.CharField(max_length=200)), - ( - "feature_flags_access_level", - models.PositiveSmallIntegerField( - choices=[(21, "Can only view"), (37, "Can always edit")], - default=37, - ), - ), - ("created_at", models.DateTimeField(auto_now_add=True)), - ( - "created_by", - models.ForeignKey( - null=True, - on_delete=django.db.models.deletion.SET_NULL, - related_name="roles", - related_query_name="role", - to=settings.AUTH_USER_MODEL, - ), - ), - ( - "organization", - models.ForeignKey( - on_delete=django.db.models.deletion.CASCADE, - related_name="roles", - related_query_name="role", - to="posthog.organization", - ), - ), - ], - ), - migrations.CreateModel( - name="RoleMembership", - fields=[ - ( - "id", - models.UUIDField( - default=posthog.models.utils.UUIDT, - editable=False, - primary_key=True, - serialize=False, - ), - ), - ("joined_at", models.DateTimeField(auto_now_add=True)), - ("updated_at", models.DateTimeField(auto_now=True)), - ( - "role", - models.ForeignKey( - on_delete=django.db.models.deletion.CASCADE, - related_name="roles", - related_query_name="role", - to="ee.role", - ), - ), - ( - "user", - models.ForeignKey( - on_delete=django.db.models.deletion.CASCADE, - related_name="role_memberships", - related_query_name="role_membership", - to=settings.AUTH_USER_MODEL, - ), - ), - ], - ), - migrations.CreateModel( - name="OrganizationResourceAccess", - fields=[ - ( - "id", - models.AutoField( - auto_created=True, - primary_key=True, - serialize=False, - verbose_name="ID", - ), - ), - ( - "resource", - models.CharField( - choices=[ - ("feature flags", "feature flags"), - ("experiments", "experiments"), - ("cohorts", "cohorts"), - ("data management", "data management"), - ("session recordings", "session recordings"), - ("insights", "insights"), - ("dashboards", "dashboards"), - ], - max_length=32, - ), - ), - ( - "access_level", - models.PositiveSmallIntegerField( - choices=[(21, "Can only view"), (37, "Can always edit")], - default=37, - ), - ), - ("created_at", models.DateTimeField(auto_now_add=True)), - ("updated_at", models.DateTimeField(auto_now=True)), - ( - "created_by", - models.ForeignKey( - null=True, - on_delete=django.db.models.deletion.SET_NULL, - to=settings.AUTH_USER_MODEL, - ), - ), - ( - "organization", - models.ForeignKey( - on_delete=django.db.models.deletion.CASCADE, - related_name="resource_access", - to="posthog.organization", - ), - ), - ], - ), - migrations.CreateModel( - name="FeatureFlagRoleAccess", - fields=[ - ( - "id", - models.AutoField( - auto_created=True, - primary_key=True, - serialize=False, - verbose_name="ID", - ), - ), - ("added_at", models.DateTimeField(auto_now_add=True)), - ("updated_at", models.DateTimeField(auto_now=True)), - ( - "feature_flag", - models.ForeignKey( - on_delete=django.db.models.deletion.CASCADE, - related_name="access", - related_query_name="access", - to="posthog.featureflag", - ), - ), - ( - "role", - models.ForeignKey( - on_delete=django.db.models.deletion.CASCADE, - related_name="feature_flag_access", - related_query_name="feature_flag_access", - to="ee.role", - ), - ), - ], - ), - migrations.AddConstraint( - model_name="rolemembership", - constraint=models.UniqueConstraint(fields=("role", "user"), name="unique_user_and_role"), - ), - migrations.AddConstraint( - model_name="role", - constraint=models.UniqueConstraint(fields=("organization", "name"), name="unique_role_name"), - ), - migrations.AddConstraint( - model_name="organizationresourceaccess", - constraint=models.UniqueConstraint( - fields=("organization", "resource"), - name="unique resource per organization", - ), - ), - migrations.AddConstraint( - model_name="featureflagroleaccess", - constraint=models.UniqueConstraint(fields=("role", "feature_flag"), name="unique_feature_flag_and_role"), - ), - ] diff --git a/ee/migrations/0015_add_verified_properties.py b/ee/migrations/0015_add_verified_properties.py deleted file mode 100644 index c61c980ba4..0000000000 --- a/ee/migrations/0015_add_verified_properties.py +++ /dev/null @@ -1,36 +0,0 @@ -# Generated by Django 3.2.18 on 2023-06-07 10:39 - -from django.conf import settings -from django.db import migrations, models -import django.db.models.deletion - - -class Migration(migrations.Migration): - dependencies = [ - migrations.swappable_dependency(settings.AUTH_USER_MODEL), - ("ee", "0014_roles_memberships_and_resource_access"), - ] - - operations = [ - migrations.AddField( - model_name="enterprisepropertydefinition", - name="verified", - field=models.BooleanField(blank=True, default=False), - ), - migrations.AddField( - model_name="enterprisepropertydefinition", - name="verified_at", - field=models.DateTimeField(blank=True, null=True), - ), - migrations.AddField( - model_name="enterprisepropertydefinition", - name="verified_by", - field=models.ForeignKey( - blank=True, - null=True, - on_delete=django.db.models.deletion.SET_NULL, - related_name="property_verifying_user", - to=settings.AUTH_USER_MODEL, - ), - ), - ] diff --git a/ee/migrations/0016_rolemembership_organization_member.py b/ee/migrations/0016_rolemembership_organization_member.py deleted file mode 100644 index d366581f31..0000000000 --- a/ee/migrations/0016_rolemembership_organization_member.py +++ /dev/null @@ -1,25 +0,0 @@ -# Generated by Django 4.1.13 on 2024-03-14 13:40 - -from django.db import migrations, models -import django.db.models.deletion - - -class Migration(migrations.Migration): - dependencies = [ - ("posthog", "0397_projects_backfill"), - ("ee", "0015_add_verified_properties"), - ] - - operations = [ - migrations.AddField( - model_name="rolemembership", - name="organization_member", - field=models.ForeignKey( - null=True, - on_delete=django.db.models.deletion.CASCADE, - related_name="role_memberships", - related_query_name="role_membership", - to="posthog.organizationmembership", - ), - ), - ] diff --git a/ee/migrations/0017_accesscontrol_and_more.py b/ee/migrations/0017_accesscontrol_and_more.py deleted file mode 100644 index 1c870d3389..0000000000 --- a/ee/migrations/0017_accesscontrol_and_more.py +++ /dev/null @@ -1,75 +0,0 @@ -# Generated by Django 4.2.15 on 2024-11-07 17:05 - -from django.conf import settings -from django.db import migrations, models -import django.db.models.deletion -import posthog.models.utils - - -class Migration(migrations.Migration): - dependencies = [ - ("posthog", "0512_errortrackingissue_errortrackingissuefingerprintv2_and_more"), - migrations.swappable_dependency(settings.AUTH_USER_MODEL), - ("ee", "0016_rolemembership_organization_member"), - ] - - operations = [ - migrations.CreateModel( - name="AccessControl", - fields=[ - ( - "id", - models.UUIDField( - default=posthog.models.utils.UUIDT, editable=False, primary_key=True, serialize=False - ), - ), - ("access_level", models.CharField(max_length=32)), - ("resource", models.CharField(max_length=32)), - ("resource_id", models.CharField(max_length=36, null=True)), - ("created_at", models.DateTimeField(auto_now_add=True)), - ("updated_at", models.DateTimeField(auto_now=True)), - ( - "created_by", - models.ForeignKey( - null=True, on_delete=django.db.models.deletion.SET_NULL, to=settings.AUTH_USER_MODEL - ), - ), - ( - "organization_member", - models.ForeignKey( - null=True, - on_delete=django.db.models.deletion.CASCADE, - related_name="access_controls", - related_query_name="access_controls", - to="posthog.organizationmembership", - ), - ), - ( - "role", - models.ForeignKey( - null=True, - on_delete=django.db.models.deletion.CASCADE, - related_name="access_controls", - related_query_name="access_controls", - to="ee.role", - ), - ), - ( - "team", - models.ForeignKey( - on_delete=django.db.models.deletion.CASCADE, - related_name="access_controls", - related_query_name="access_controls", - to="posthog.team", - ), - ), - ], - ), - migrations.AddConstraint( - model_name="accesscontrol", - constraint=models.UniqueConstraint( - fields=("resource", "resource_id", "team", "organization_member", "role"), - name="unique resource per target", - ), - ), - ] diff --git a/ee/migrations/0018_conversation_conversationcheckpoint_and_more.py b/ee/migrations/0018_conversation_conversationcheckpoint_and_more.py deleted file mode 100644 index ec48cc780a..0000000000 --- a/ee/migrations/0018_conversation_conversationcheckpoint_and_more.py +++ /dev/null @@ -1,147 +0,0 @@ -# Generated by Django 4.2.15 on 2024-12-11 15:51 - -from django.conf import settings -from django.db import migrations, models -import django.db.models.deletion -import posthog.models.utils - - -class Migration(migrations.Migration): - dependencies = [ - ("posthog", "0528_project_field_in_taxonomy"), - migrations.swappable_dependency(settings.AUTH_USER_MODEL), - ("ee", "0017_accesscontrol_and_more"), - ] - - operations = [ - migrations.CreateModel( - name="Conversation", - fields=[ - ( - "id", - models.UUIDField( - default=posthog.models.utils.UUIDT, editable=False, primary_key=True, serialize=False - ), - ), - ("team", models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to="posthog.team")), - ("user", models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)), - ], - options={ - "abstract": False, - }, - ), - migrations.CreateModel( - name="ConversationCheckpoint", - fields=[ - ( - "id", - models.UUIDField( - default=posthog.models.utils.UUIDT, editable=False, primary_key=True, serialize=False - ), - ), - ( - "checkpoint_ns", - models.TextField( - default="", - help_text='Checkpoint namespace. Denotes the path to the subgraph node the checkpoint originates from, separated by `|` character, e.g. `"child|grandchild"`. Defaults to "" (root graph).', - ), - ), - ("checkpoint", models.JSONField(help_text="Serialized checkpoint data.", null=True)), - ("metadata", models.JSONField(help_text="Serialized checkpoint metadata.", null=True)), - ( - "parent_checkpoint", - models.ForeignKey( - help_text="Parent checkpoint ID.", - null=True, - on_delete=django.db.models.deletion.CASCADE, - related_name="children", - to="ee.conversationcheckpoint", - ), - ), - ( - "thread", - models.ForeignKey( - on_delete=django.db.models.deletion.CASCADE, related_name="checkpoints", to="ee.conversation" - ), - ), - ], - ), - migrations.CreateModel( - name="ConversationCheckpointWrite", - fields=[ - ( - "id", - models.UUIDField( - default=posthog.models.utils.UUIDT, editable=False, primary_key=True, serialize=False - ), - ), - ("task_id", models.UUIDField(help_text="Identifier for the task creating the checkpoint write.")), - ( - "idx", - models.IntegerField( - help_text="Index of the checkpoint write. It is an integer value where negative numbers are reserved for special cases, such as node interruption." - ), - ), - ( - "channel", - models.TextField( - help_text="An arbitrary string defining the channel name. For example, it can be a node name or a reserved LangGraph's enum." - ), - ), - ("type", models.TextField(help_text="Type of the serialized blob. For example, `json`.", null=True)), - ("blob", models.BinaryField(null=True)), - ( - "checkpoint", - models.ForeignKey( - on_delete=django.db.models.deletion.CASCADE, - related_name="writes", - to="ee.conversationcheckpoint", - ), - ), - ], - ), - migrations.CreateModel( - name="ConversationCheckpointBlob", - fields=[ - ( - "id", - models.UUIDField( - default=posthog.models.utils.UUIDT, editable=False, primary_key=True, serialize=False - ), - ), - ( - "channel", - models.TextField( - help_text="An arbitrary string defining the channel name. For example, it can be a node name or a reserved LangGraph's enum." - ), - ), - ("version", models.TextField(help_text="Monotonically increasing version of the channel.")), - ("type", models.TextField(help_text="Type of the serialized blob. For example, `json`.", null=True)), - ("blob", models.BinaryField(null=True)), - ( - "checkpoint", - models.ForeignKey( - on_delete=django.db.models.deletion.CASCADE, - related_name="blobs", - to="ee.conversationcheckpoint", - ), - ), - ], - ), - migrations.AddConstraint( - model_name="conversationcheckpointwrite", - constraint=models.UniqueConstraint( - fields=("checkpoint_id", "task_id", "idx"), name="unique_checkpoint_write" - ), - ), - migrations.AddConstraint( - model_name="conversationcheckpointblob", - constraint=models.UniqueConstraint( - fields=("checkpoint_id", "channel", "version"), name="unique_checkpoint_blob" - ), - ), - migrations.AddConstraint( - model_name="conversationcheckpoint", - constraint=models.UniqueConstraint(fields=("id", "checkpoint_ns", "thread"), name="unique_checkpoint"), - ), - ] diff --git a/ee/migrations/0019_remove_conversationcheckpointblob_unique_checkpoint_blob_and_more.py b/ee/migrations/0019_remove_conversationcheckpointblob_unique_checkpoint_blob_and_more.py deleted file mode 100644 index 377f85b3d2..0000000000 --- a/ee/migrations/0019_remove_conversationcheckpointblob_unique_checkpoint_blob_and_more.py +++ /dev/null @@ -1,38 +0,0 @@ -# Generated by Django 4.2.15 on 2024-12-19 11:00 - -from django.db import migrations, models -import django.db.models.deletion - - -class Migration(migrations.Migration): - dependencies = [ - ("ee", "0018_conversation_conversationcheckpoint_and_more"), - ] - - operations = [ - migrations.RemoveConstraint( - model_name="conversationcheckpointblob", - name="unique_checkpoint_blob", - ), - migrations.AddField( - model_name="conversationcheckpointblob", - name="checkpoint_ns", - field=models.TextField( - default="", - help_text='Checkpoint namespace. Denotes the path to the subgraph node the checkpoint originates from, separated by `|` character, e.g. `"child|grandchild"`. Defaults to "" (root graph).', - ), - ), - migrations.AddField( - model_name="conversationcheckpointblob", - name="thread", - field=models.ForeignKey( - null=True, on_delete=django.db.models.deletion.CASCADE, related_name="blobs", to="ee.conversation" - ), - ), - migrations.AddConstraint( - model_name="conversationcheckpointblob", - constraint=models.UniqueConstraint( - fields=("thread_id", "checkpoint_ns", "channel", "version"), name="unique_checkpoint_blob" - ), - ), - ] diff --git a/ee/migrations/0020_corememory.py b/ee/migrations/0020_corememory.py deleted file mode 100644 index a66baec6e5..0000000000 --- a/ee/migrations/0020_corememory.py +++ /dev/null @@ -1,45 +0,0 @@ -# Generated by Django 4.2.15 on 2024-12-20 15:14 - -from django.db import migrations, models -import django.db.models.deletion -import posthog.models.utils - - -class Migration(migrations.Migration): - dependencies = [ - ("posthog", "0535_alter_hogfunction_type"), - ("ee", "0019_remove_conversationcheckpointblob_unique_checkpoint_blob_and_more"), - ] - - operations = [ - migrations.CreateModel( - name="CoreMemory", - fields=[ - ( - "id", - models.UUIDField( - default=posthog.models.utils.UUIDT, editable=False, primary_key=True, serialize=False - ), - ), - ( - "text", - models.TextField(default="", help_text="Dumped core memory where facts are separated by newlines."), - ), - ("initial_text", models.TextField(default="", help_text="Scraped memory about the business.")), - ( - "scraping_status", - models.CharField( - blank=True, - choices=[("pending", "Pending"), ("completed", "Completed"), ("skipped", "Skipped")], - max_length=20, - null=True, - ), - ), - ("scraping_started_at", models.DateTimeField(null=True)), - ("team", models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, to="posthog.team")), - ], - options={ - "abstract": False, - }, - ), - ] diff --git a/ee/migrations/__init__.py b/ee/migrations/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/migrations/max_migration.txt b/ee/migrations/max_migration.txt deleted file mode 100644 index cd0433c401..0000000000 --- a/ee/migrations/max_migration.txt +++ /dev/null @@ -1 +0,0 @@ -0020_corememory diff --git a/ee/models/__init__.py b/ee/models/__init__.py deleted file mode 100644 index d1dfa7e8dc..0000000000 --- a/ee/models/__init__.py +++ /dev/null @@ -1,34 +0,0 @@ -from .assistant import ( - Conversation, - ConversationCheckpoint, - ConversationCheckpointBlob, - ConversationCheckpointWrite, - CoreMemory, -) -from .dashboard_privilege import DashboardPrivilege -from .event_definition import EnterpriseEventDefinition -from .explicit_team_membership import ExplicitTeamMembership -from .feature_flag_role_access import FeatureFlagRoleAccess -from .hook import Hook -from .license import License -from .property_definition import EnterprisePropertyDefinition -from .rbac.access_control import AccessControl -from .rbac.role import Role, RoleMembership - -__all__ = [ - "AccessControl", - "ConversationCheckpoint", - "ConversationCheckpointBlob", - "ConversationCheckpointWrite", - "CoreMemory", - "DashboardPrivilege", - "Conversation", - "EnterpriseEventDefinition", - "EnterprisePropertyDefinition", - "ExplicitTeamMembership", - "FeatureFlagRoleAccess", - "Hook", - "License", - "Role", - "RoleMembership", -] diff --git a/ee/models/assistant.py b/ee/models/assistant.py deleted file mode 100644 index 90ac31e339..0000000000 --- a/ee/models/assistant.py +++ /dev/null @@ -1,145 +0,0 @@ -from collections.abc import Iterable -from datetime import timedelta - -from django.db import models -from django.utils import timezone -from langgraph.checkpoint.serde.types import TASKS - -from posthog.models.team.team import Team -from posthog.models.user import User -from posthog.models.utils import UUIDModel - - -class Conversation(UUIDModel): - user = models.ForeignKey(User, on_delete=models.CASCADE) - team = models.ForeignKey(Team, on_delete=models.CASCADE) - - -class ConversationCheckpoint(UUIDModel): - thread = models.ForeignKey(Conversation, on_delete=models.CASCADE, related_name="checkpoints") - checkpoint_ns = models.TextField( - default="", - help_text='Checkpoint namespace. Denotes the path to the subgraph node the checkpoint originates from, separated by `|` character, e.g. `"child|grandchild"`. Defaults to "" (root graph).', - ) - parent_checkpoint = models.ForeignKey( - "self", null=True, on_delete=models.CASCADE, related_name="children", help_text="Parent checkpoint ID." - ) - checkpoint = models.JSONField(null=True, help_text="Serialized checkpoint data.") - metadata = models.JSONField(null=True, help_text="Serialized checkpoint metadata.") - - class Meta: - constraints = [ - models.UniqueConstraint( - fields=["id", "checkpoint_ns", "thread"], - name="unique_checkpoint", - ) - ] - - @property - def pending_sends(self) -> Iterable["ConversationCheckpointWrite"]: - if self.parent_checkpoint is None: - return [] - return self.parent_checkpoint.writes.filter(channel=TASKS).order_by("task_id", "idx") - - @property - def pending_writes(self) -> Iterable["ConversationCheckpointWrite"]: - return self.writes.order_by("idx", "task_id") - - -class ConversationCheckpointBlob(UUIDModel): - checkpoint = models.ForeignKey(ConversationCheckpoint, on_delete=models.CASCADE, related_name="blobs") - """ - The checkpoint that created the blob. Do not use this field to query blobs. - """ - thread = models.ForeignKey(Conversation, on_delete=models.CASCADE, related_name="blobs", null=True) - checkpoint_ns = models.TextField( - default="", - help_text='Checkpoint namespace. Denotes the path to the subgraph node the checkpoint originates from, separated by `|` character, e.g. `"child|grandchild"`. Defaults to "" (root graph).', - ) - channel = models.TextField( - help_text="An arbitrary string defining the channel name. For example, it can be a node name or a reserved LangGraph's enum." - ) - version = models.TextField(help_text="Monotonically increasing version of the channel.") - type = models.TextField(null=True, help_text="Type of the serialized blob. For example, `json`.") - blob = models.BinaryField(null=True) - - class Meta: - constraints = [ - models.UniqueConstraint( - fields=["thread_id", "checkpoint_ns", "channel", "version"], - name="unique_checkpoint_blob", - ) - ] - - -class ConversationCheckpointWrite(UUIDModel): - checkpoint = models.ForeignKey(ConversationCheckpoint, on_delete=models.CASCADE, related_name="writes") - task_id = models.UUIDField(help_text="Identifier for the task creating the checkpoint write.") - idx = models.IntegerField( - help_text="Index of the checkpoint write. It is an integer value where negative numbers are reserved for special cases, such as node interruption." - ) - channel = models.TextField( - help_text="An arbitrary string defining the channel name. For example, it can be a node name or a reserved LangGraph's enum." - ) - type = models.TextField(null=True, help_text="Type of the serialized blob. For example, `json`.") - blob = models.BinaryField(null=True) - - class Meta: - constraints = [ - models.UniqueConstraint( - fields=["checkpoint_id", "task_id", "idx"], - name="unique_checkpoint_write", - ) - ] - - -class CoreMemory(UUIDModel): - class ScrapingStatus(models.TextChoices): - PENDING = "pending", "Pending" - COMPLETED = "completed", "Completed" - SKIPPED = "skipped", "Skipped" - - team = models.OneToOneField(Team, on_delete=models.CASCADE) - text = models.TextField(default="", help_text="Dumped core memory where facts are separated by newlines.") - initial_text = models.TextField(default="", help_text="Scraped memory about the business.") - scraping_status = models.CharField(max_length=20, choices=ScrapingStatus.choices, blank=True, null=True) - scraping_started_at = models.DateTimeField(null=True) - - def change_status_to_pending(self): - self.scraping_started_at = timezone.now() - self.scraping_status = CoreMemory.ScrapingStatus.PENDING - self.save() - - def change_status_to_skipped(self): - self.scraping_status = CoreMemory.ScrapingStatus.SKIPPED - self.save() - - @property - def is_scraping_pending(self) -> bool: - return self.scraping_status == CoreMemory.ScrapingStatus.PENDING and ( - self.scraping_started_at is None or (self.scraping_started_at + timedelta(minutes=5)) > timezone.now() - ) - - @property - def is_scraping_finished(self) -> bool: - return self.scraping_status in [CoreMemory.ScrapingStatus.COMPLETED, CoreMemory.ScrapingStatus.SKIPPED] - - def set_core_memory(self, text: str): - self.text = text - self.initial_text = text - self.scraping_status = CoreMemory.ScrapingStatus.COMPLETED - self.save() - - def append_core_memory(self, text: str): - self.text = self.text + "\n" + text - self.save() - - def replace_core_memory(self, original_fragment: str, new_fragment: str): - if original_fragment not in self.text: - raise ValueError(f"Original fragment {original_fragment} not found in core memory") - self.text = self.text.replace(original_fragment, new_fragment) - self.save() - - @property - def formatted_text(self) -> str: - return self.text[0:5000] diff --git a/ee/models/dashboard_privilege.py b/ee/models/dashboard_privilege.py deleted file mode 100644 index 4dde1f4d13..0000000000 --- a/ee/models/dashboard_privilege.py +++ /dev/null @@ -1,30 +0,0 @@ -from django.db import models - -from posthog.models.dashboard import Dashboard -from posthog.models.utils import UUIDModel, sane_repr - - -# We call models that grant a user access to some resource (which isn't a grouping of users) a "privilege" -class DashboardPrivilege(UUIDModel): - dashboard = models.ForeignKey( - "posthog.Dashboard", - on_delete=models.CASCADE, - related_name="privileges", - related_query_name="privilege", - ) - user = models.ForeignKey( - "posthog.User", - on_delete=models.CASCADE, - related_name="explicit_dashboard_privileges", - related_query_name="explicit_dashboard_privilege", - ) - level = models.PositiveSmallIntegerField(choices=Dashboard.RestrictionLevel.choices) - added_at = models.DateTimeField(auto_now_add=True) - updated_at = models.DateTimeField(auto_now=True) - - class Meta: - constraints = [ - models.UniqueConstraint(fields=["dashboard", "user"], name="unique_explicit_dashboard_privilege") - ] - - __repr__ = sane_repr("dashboard", "user", "level") diff --git a/ee/models/event_definition.py b/ee/models/event_definition.py deleted file mode 100644 index fc172c4ac3..0000000000 --- a/ee/models/event_definition.py +++ /dev/null @@ -1,35 +0,0 @@ -from django.contrib.postgres.fields import ArrayField -from django.db import models - -from posthog.models.event_definition import EventDefinition - - -class EnterpriseEventDefinition(EventDefinition): - owner = models.ForeignKey( - "posthog.User", - null=True, - on_delete=models.SET_NULL, - related_name="event_definitions", - ) - description = models.TextField(blank=True, null=True, default="") - updated_at = models.DateTimeField(auto_now=True) - updated_by = models.ForeignKey("posthog.User", null=True, on_delete=models.SET_NULL, blank=True) - verified = models.BooleanField(default=False, blank=True) - verified_at = models.DateTimeField(null=True, blank=True) - verified_by = models.ForeignKey( - "posthog.User", - null=True, - on_delete=models.SET_NULL, - blank=True, - related_name="verifying_user", - ) - - # Deprecated in favour of app-wide tagging model. See EnterpriseTaggedItem - deprecated_tags: ArrayField = ArrayField(models.CharField(max_length=32), null=True, blank=True, default=list) - deprecated_tags_v2: ArrayField = ArrayField( - models.CharField(max_length=32), - null=True, - blank=True, - default=None, - db_column="tags", - ) diff --git a/ee/models/explicit_team_membership.py b/ee/models/explicit_team_membership.py deleted file mode 100644 index 35330a11bb..0000000000 --- a/ee/models/explicit_team_membership.py +++ /dev/null @@ -1,47 +0,0 @@ -from django.db import models - -from posthog.models.utils import UUIDModel, sane_repr -from posthog.models.organization import OrganizationMembership - - -# We call models that grant a user access to some grouping of users a "membership" -class ExplicitTeamMembership(UUIDModel): - class Level(models.IntegerChoices): - """Keep in sync with OrganizationMembership.Level (only difference being organizations having an Owner).""" - - MEMBER = 1, "member" - ADMIN = 8, "administrator" - - team = models.ForeignKey( - "posthog.Team", - on_delete=models.CASCADE, - related_name="explicit_memberships", - related_query_name="explicit_membership", - ) - parent_membership = models.ForeignKey( - "posthog.OrganizationMembership", - on_delete=models.CASCADE, - related_name="explicit_team_memberships", - related_query_name="explicit_team_membership", - ) - level = models.PositiveSmallIntegerField(default=Level.MEMBER, choices=Level.choices) - joined_at = models.DateTimeField(auto_now_add=True) - updated_at = models.DateTimeField(auto_now=True) - - class Meta: - constraints = [ - models.UniqueConstraint( - fields=["team", "parent_membership"], - name="unique_explicit_team_membership", - ) - ] - - def __str__(self): - return str(self.Level(self.level)) - - @property - def effective_level(self) -> "OrganizationMembership.Level": - """If organization level is higher than project level, then that takes precedence over explicit project level.""" - return max(self.level, self.parent_membership.level) - - __repr__ = sane_repr("team", "parent_membership", "level") diff --git a/ee/models/feature_flag_role_access.py b/ee/models/feature_flag_role_access.py deleted file mode 100644 index 867f2d562b..0000000000 --- a/ee/models/feature_flag_role_access.py +++ /dev/null @@ -1,21 +0,0 @@ -from django.db import models - - -class FeatureFlagRoleAccess(models.Model): - feature_flag = models.ForeignKey( - "posthog.FeatureFlag", - on_delete=models.CASCADE, - related_name="access", - related_query_name="access", - ) - role = models.ForeignKey( - "Role", - on_delete=models.CASCADE, - related_name="feature_flag_access", - related_query_name="feature_flag_access", - ) - added_at = models.DateTimeField(auto_now_add=True) - updated_at = models.DateTimeField(auto_now=True) - - class Meta: - constraints = [models.UniqueConstraint(fields=["role", "feature_flag"], name="unique_feature_flag_and_role")] diff --git a/ee/models/hook.py b/ee/models/hook.py deleted file mode 100644 index 7cfaf22b9f..0000000000 --- a/ee/models/hook.py +++ /dev/null @@ -1,47 +0,0 @@ -import json - -from django.core.exceptions import ValidationError -from django.db import models -from django.db.models.signals import post_delete, post_save -from django.dispatch.dispatcher import receiver - -from posthog.models.signals import mutable_receiver -from posthog.models.utils import generate_random_token -from posthog.redis import get_client - - -HOOK_EVENTS = ["action_performed"] - - -class Hook(models.Model): - id = models.CharField(primary_key=True, max_length=50, default=generate_random_token) - user = models.ForeignKey("posthog.User", related_name="rest_hooks", on_delete=models.CASCADE) - team = models.ForeignKey("posthog.Team", related_name="rest_hooks", on_delete=models.CASCADE) - event = models.CharField("Event", max_length=64, db_index=True) - resource_id = models.IntegerField(null=True, blank=True) - target = models.URLField("Target URL", max_length=255) - created = models.DateTimeField(auto_now_add=True) - updated = models.DateTimeField(auto_now=True) - - def clean(self): - """Validation for events.""" - if self.event not in HOOK_EVENTS: - raise ValidationError("Invalid hook event {evt}.".format(evt=self.event)) - - -@receiver(post_save, sender=Hook) -def hook_saved(sender, instance: Hook, created, **kwargs): - if instance.event == "action_performed": - get_client().publish( - "reload-action", - json.dumps({"teamId": instance.team_id, "actionId": instance.resource_id}), - ) - - -@mutable_receiver(post_delete, sender=Hook) -def hook_deleted(sender, instance: Hook, **kwargs): - if instance.event == "action_performed": - get_client().publish( - "drop-action", - json.dumps({"teamId": instance.team_id, "actionId": instance.resource_id}), - ) diff --git a/ee/models/license.py b/ee/models/license.py deleted file mode 100644 index 5a18d8f8c5..0000000000 --- a/ee/models/license.py +++ /dev/null @@ -1,119 +0,0 @@ -from typing import Optional - -from django.contrib.auth import get_user_model -from django.db import models -from django.db.models import Q -from django.db.models.signals import post_save -from django.dispatch.dispatcher import receiver -from django.utils import timezone -from rest_framework import exceptions, status - -from posthog.constants import AvailableFeature -from posthog.models.utils import sane_repr -from posthog.tasks.tasks import sync_all_organization_available_product_features - - -class LicenseError(exceptions.APIException): - """ - Exception raised for licensing errors. - """ - - default_type = "license_error" - default_code = "license_error" - status_code = status.HTTP_400_BAD_REQUEST - default_detail = "There was a problem with your current license." - - def __init__(self, code, detail): - self.code = code - self.detail = exceptions._get_error_details(detail, code) - - -class LicenseManager(models.Manager): - def first_valid(self) -> Optional["License"]: - """Return the highest valid license or cloud licenses if any""" - valid_licenses = list(self.filter(Q(valid_until__gte=timezone.now()) | Q(plan="cloud"))) - if not valid_licenses: - return None - return max( - valid_licenses, - key=lambda license: License.PLAN_TO_SORTING_VALUE.get(license.plan, 0), - ) - - -class License(models.Model): - objects: LicenseManager = LicenseManager() - - created_at = models.DateTimeField(auto_now_add=True) - plan = models.CharField(max_length=200) - valid_until = models.DateTimeField() - key = models.CharField(max_length=200) - # DEPRECATED: This is no longer used - max_users = models.IntegerField(default=None, null=True) # None = no restriction - - # NOTE: Remember to update the Billing Service as well. Long-term it will be the source of truth. - SCALE_PLAN = "scale" - SCALE_FEATURES = [ - AvailableFeature.ZAPIER, - AvailableFeature.ORGANIZATIONS_PROJECTS, - AvailableFeature.SOCIAL_SSO, - AvailableFeature.INGESTION_TAXONOMY, - AvailableFeature.PATHS_ADVANCED, - AvailableFeature.CORRELATION_ANALYSIS, - AvailableFeature.GROUP_ANALYTICS, - AvailableFeature.TAGGING, - AvailableFeature.BEHAVIORAL_COHORT_FILTERING, - AvailableFeature.WHITE_LABELLING, - AvailableFeature.SUBSCRIPTIONS, - AvailableFeature.APP_METRICS, - AvailableFeature.RECORDINGS_PLAYLISTS, - AvailableFeature.RECORDINGS_FILE_EXPORT, - AvailableFeature.RECORDINGS_PERFORMANCE, - ] - - ENTERPRISE_PLAN = "enterprise" - ENTERPRISE_FEATURES = [ - *SCALE_FEATURES, - AvailableFeature.ADVANCED_PERMISSIONS, - AvailableFeature.PROJECT_BASED_PERMISSIONING, - AvailableFeature.SAML, - AvailableFeature.SSO_ENFORCEMENT, - AvailableFeature.ROLE_BASED_ACCESS, - ] - PLANS = {SCALE_PLAN: SCALE_FEATURES, ENTERPRISE_PLAN: ENTERPRISE_FEATURES} - # The higher the plan, the higher its sorting value - sync with front-end licenseLogic - PLAN_TO_SORTING_VALUE = {SCALE_PLAN: 10, ENTERPRISE_PLAN: 20} - - @property - def available_features(self) -> list[AvailableFeature]: - return self.PLANS.get(self.plan, []) - - @property - def is_v2_license(self) -> bool: - return self.key and len(self.key.split("::")) == 2 - - __repr__ = sane_repr("key", "plan", "valid_until") - - -def get_licensed_users_available() -> Optional[int]: - """ - Returns the number of user slots available that can be created based on the instance's current license. - Not relevant for cloud users. - `None` means unlimited users. - """ - - license = License.objects.first_valid() - from posthog.models import OrganizationInvite - - if license: - if license.max_users is None: - return None - - users_left = license.max_users - get_user_model().objects.count() - OrganizationInvite.objects.count() - return max(users_left, 0) - - return None - - -@receiver(post_save, sender=License) -def license_saved(sender, instance, created, raw, using, **kwargs): - sync_all_organization_available_product_features() diff --git a/ee/models/property_definition.py b/ee/models/property_definition.py deleted file mode 100644 index 3354afacb4..0000000000 --- a/ee/models/property_definition.py +++ /dev/null @@ -1,31 +0,0 @@ -from django.contrib.postgres.fields import ArrayField -from django.db import models - -from posthog.models.property_definition import PropertyDefinition - - -class EnterprisePropertyDefinition(PropertyDefinition): - description = models.TextField(blank=True, null=True, default="") - updated_at = models.DateTimeField(auto_now=True) - updated_by = models.ForeignKey("posthog.User", null=True, on_delete=models.SET_NULL, blank=True) - - verified = models.BooleanField(default=False, blank=True) - verified_at = models.DateTimeField(null=True, blank=True) - - verified_by = models.ForeignKey( - "posthog.User", - null=True, - on_delete=models.SET_NULL, - blank=True, - related_name="property_verifying_user", - ) - - # Deprecated in favour of app-wide tagging model. See EnterpriseTaggedItem - deprecated_tags: ArrayField = ArrayField(models.CharField(max_length=32), null=True, blank=True, default=list) - deprecated_tags_v2: ArrayField = ArrayField( - models.CharField(max_length=32), - null=True, - blank=True, - default=None, - db_column="tags", - ) diff --git a/ee/models/rbac/access_control.py b/ee/models/rbac/access_control.py deleted file mode 100644 index 9566b4adab..0000000000 --- a/ee/models/rbac/access_control.py +++ /dev/null @@ -1,53 +0,0 @@ -from django.db import models - -from posthog.models.utils import UUIDModel - - -class AccessControl(UUIDModel): - class Meta: - constraints = [ - models.UniqueConstraint( - fields=["resource", "resource_id", "team", "organization_member", "role"], - name="unique resource per target", - ) - ] - - team = models.ForeignKey( - "posthog.Team", - on_delete=models.CASCADE, - related_name="access_controls", - related_query_name="access_controls", - ) - - # Configuration of what we are accessing - access_level: models.CharField = models.CharField(max_length=32) - resource: models.CharField = models.CharField(max_length=32) - resource_id: models.CharField = models.CharField(max_length=36, null=True) - - # Optional scope it to a specific member - organization_member = models.ForeignKey( - "posthog.OrganizationMembership", - on_delete=models.CASCADE, - related_name="access_controls", - related_query_name="access_controls", - null=True, - ) - - # Optional scope it to a specific role - role = models.ForeignKey( - "Role", - on_delete=models.CASCADE, - related_name="access_controls", - related_query_name="access_controls", - null=True, - ) - - created_by = models.ForeignKey( - "posthog.User", - on_delete=models.SET_NULL, - null=True, - ) - created_at: models.DateTimeField = models.DateTimeField(auto_now_add=True) - updated_at: models.DateTimeField = models.DateTimeField(auto_now=True) - - # TODO: add model validation for access_level and resource diff --git a/ee/models/rbac/organization_resource_access.py b/ee/models/rbac/organization_resource_access.py deleted file mode 100644 index de4c86d95a..0000000000 --- a/ee/models/rbac/organization_resource_access.py +++ /dev/null @@ -1,41 +0,0 @@ -from django.db import models - -from posthog.models.organization import Organization - -# NOTE: This will be deprecated in favour of the AccessControl model - - -class OrganizationResourceAccess(models.Model): - class AccessLevel(models.IntegerChoices): - """Level for which a role or user can edit or view resources""" - - CAN_ONLY_VIEW = 21, "Can only view" - CAN_ALWAYS_EDIT = 37, "Can always edit" - - class Resources(models.TextChoices): - FEATURE_FLAGS = "feature flags", "feature flags" - EXPERIMENTS = "experiments", "experiments" - COHORTS = "cohorts", "cohorts" - DATA_MANAGEMENT = "data management", "data management" - SESSION_RECORDINGS = "session recordings", "session recordings" - INSIGHTS = "insights", "insights" - DASHBOARDS = "dashboards", "dashboards" - - resource = models.CharField(max_length=32, choices=Resources.choices) - access_level = models.PositiveSmallIntegerField(default=AccessLevel.CAN_ALWAYS_EDIT, choices=AccessLevel.choices) - organization = models.ForeignKey(Organization, on_delete=models.CASCADE, related_name="resource_access") - created_by = models.ForeignKey( - "posthog.User", - on_delete=models.SET_NULL, - null=True, - ) - created_at = models.DateTimeField(auto_now_add=True) - updated_at = models.DateTimeField(auto_now=True) - - class Meta: - constraints = [ - models.UniqueConstraint( - fields=["organization", "resource"], - name="unique resource per organization", - ) - ] diff --git a/ee/models/rbac/role.py b/ee/models/rbac/role.py deleted file mode 100644 index cb35294da2..0000000000 --- a/ee/models/rbac/role.py +++ /dev/null @@ -1,61 +0,0 @@ -from django.db import models - -from ee.models.rbac.organization_resource_access import OrganizationResourceAccess -from posthog.models.utils import UUIDModel - - -class Role(UUIDModel): - class Meta: - constraints = [models.UniqueConstraint(fields=["organization", "name"], name="unique_role_name")] - - name = models.CharField(max_length=200) - organization = models.ForeignKey( - "posthog.Organization", - on_delete=models.CASCADE, - related_name="roles", - related_query_name="role", - ) - - created_at = models.DateTimeField(auto_now_add=True) - created_by = models.ForeignKey( - "posthog.User", - on_delete=models.SET_NULL, - related_name="roles", - related_query_name="role", - null=True, - ) - - # TODO: Deprecate this field - feature_flags_access_level = models.PositiveSmallIntegerField( - default=OrganizationResourceAccess.AccessLevel.CAN_ALWAYS_EDIT, - choices=OrganizationResourceAccess.AccessLevel.choices, - ) - - -class RoleMembership(UUIDModel): - class Meta: - constraints = [models.UniqueConstraint(fields=["role", "user"], name="unique_user_and_role")] - - role = models.ForeignKey( - "Role", - on_delete=models.CASCADE, - related_name="roles", - related_query_name="role", - ) - # TODO: Eventually remove this as we only need the organization membership - user = models.ForeignKey( - "posthog.User", - on_delete=models.CASCADE, - related_name="role_memberships", - related_query_name="role_membership", - ) - - organization_member = models.ForeignKey( - "posthog.OrganizationMembership", - on_delete=models.CASCADE, - related_name="role_memberships", - related_query_name="role_membership", - null=True, - ) - joined_at = models.DateTimeField(auto_now_add=True) - updated_at = models.DateTimeField(auto_now=True) diff --git a/ee/models/test/__init__.py b/ee/models/test/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/models/test/test_assistant.py b/ee/models/test/test_assistant.py deleted file mode 100644 index daf00499bb..0000000000 --- a/ee/models/test/test_assistant.py +++ /dev/null @@ -1,95 +0,0 @@ -from datetime import timedelta - -from django.utils import timezone -from freezegun import freeze_time - -from ee.models.assistant import CoreMemory -from posthog.test.base import BaseTest - - -class TestCoreMemory(BaseTest): - def setUp(self): - super().setUp() - self.core_memory = CoreMemory.objects.create(team=self.team) - - def test_status_changes(self): - # Test pending status - self.core_memory.change_status_to_pending() - self.assertEqual(self.core_memory.scraping_status, CoreMemory.ScrapingStatus.PENDING) - self.assertIsNotNone(self.core_memory.scraping_started_at) - - # Test skipped status - self.core_memory.change_status_to_skipped() - self.assertEqual(self.core_memory.scraping_status, CoreMemory.ScrapingStatus.SKIPPED) - - def test_scraping_status_properties(self): - # Test pending status within time window - self.core_memory.change_status_to_pending() - self.assertTrue(self.core_memory.is_scraping_pending) - - # Test pending status outside time window - self.core_memory.scraping_started_at = timezone.now() - timedelta(minutes=6) - self.core_memory.save() - self.assertFalse(self.core_memory.is_scraping_pending) - - # Test finished status - self.core_memory.scraping_status = CoreMemory.ScrapingStatus.COMPLETED - self.core_memory.save() - self.assertTrue(self.core_memory.is_scraping_finished) - - self.core_memory.scraping_status = CoreMemory.ScrapingStatus.SKIPPED - self.core_memory.save() - self.assertTrue(self.core_memory.is_scraping_finished) - - @freeze_time("2023-01-01 12:00:00") - def test_is_scraping_pending_timing(self): - # Set initial pending status - self.core_memory.change_status_to_pending() - initial_time = timezone.now() - - # Test 3 minutes after (should be true) - with freeze_time(initial_time + timedelta(minutes=3)): - self.assertTrue(self.core_memory.is_scraping_pending) - - # Test exactly 5 minutes after (should be false) - with freeze_time(initial_time + timedelta(minutes=5)): - self.assertFalse(self.core_memory.is_scraping_pending) - - # Test 6 minutes after (should be false) - with freeze_time(initial_time + timedelta(minutes=6)): - self.assertFalse(self.core_memory.is_scraping_pending) - - def test_core_memory_operations(self): - # Test setting core memory - test_text = "Test memory content" - self.core_memory.set_core_memory(test_text) - self.assertEqual(self.core_memory.text, test_text) - self.assertEqual(self.core_memory.initial_text, test_text) - self.assertEqual(self.core_memory.scraping_status, CoreMemory.ScrapingStatus.COMPLETED) - - # Test appending core memory - append_text = "Additional content" - self.core_memory.append_core_memory(append_text) - self.assertEqual(self.core_memory.text, f"{test_text}\n{append_text}") - - # Test replacing core memory - original = "content" - new = "memory" - self.core_memory.replace_core_memory(original, new) - self.assertIn(new, self.core_memory.text) - self.assertNotIn(original, self.core_memory.text) - - # Test replacing non-existent content - with self.assertRaises(ValueError): - self.core_memory.replace_core_memory("nonexistent", "new") - - def test_formatted_text(self): - # Test formatted text with short content - short_text = "Short text" - self.core_memory.set_core_memory(short_text) - self.assertEqual(self.core_memory.formatted_text, short_text) - - # Test formatted text with long content - long_text = "x" * 6000 - self.core_memory.set_core_memory(long_text) - self.assertEqual(len(self.core_memory.formatted_text), 5000) diff --git a/ee/models/test/test_event_definition_model.py b/ee/models/test/test_event_definition_model.py deleted file mode 100644 index 253de5d9c1..0000000000 --- a/ee/models/test/test_event_definition_model.py +++ /dev/null @@ -1,19 +0,0 @@ -import pytest - -from ee.models.event_definition import EnterpriseEventDefinition -from posthog.test.base import BaseTest - - -class TestEventDefinition(BaseTest): - def test_errors_on_invalid_verified_by_type(self): - with pytest.raises(ValueError): - EnterpriseEventDefinition.objects.create( - team=self.team, - name="enterprise event", - owner=self.user, - verified_by="Not user id", # type: ignore - ) - - def test_default_verified_false(self): - eventDef = EnterpriseEventDefinition.objects.create(team=self.team, name="enterprise event", owner=self.user) - assert eventDef.verified is False diff --git a/ee/models/test/test_property_definition_model.py b/ee/models/test/test_property_definition_model.py deleted file mode 100644 index 25ede95c04..0000000000 --- a/ee/models/test/test_property_definition_model.py +++ /dev/null @@ -1,18 +0,0 @@ -import pytest - -from ee.models.property_definition import EnterprisePropertyDefinition -from posthog.test.base import BaseTest - - -class TestPropertyDefinition(BaseTest): - def test_errors_on_invalid_verified_by_type(self): - with pytest.raises(ValueError): - EnterprisePropertyDefinition.objects.create( - team=self.team, - name="enterprise property", - verified_by="Not user id", # type: ignore - ) - - def test_default_verified_false(self): - definition = EnterprisePropertyDefinition.objects.create(team=self.team, name="enterprise property") - assert definition.verified is False diff --git a/ee/pytest.ini b/ee/pytest.ini deleted file mode 100644 index 4af882084e..0000000000 --- a/ee/pytest.ini +++ /dev/null @@ -1,11 +0,0 @@ -[pytest] -env = - DEBUG=1 - TEST=1 -DJANGO_SETTINGS_MODULE = posthog.settings -addopts = -p no:warnings --reuse-db - -markers = - ee - clickhouse_only - skip_on_multitenancy diff --git a/ee/session_recordings/__init__.py b/ee/session_recordings/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/session_recordings/ai/utils.py b/ee/session_recordings/ai/utils.py deleted file mode 100644 index 38ef49cfdb..0000000000 --- a/ee/session_recordings/ai/utils.py +++ /dev/null @@ -1,179 +0,0 @@ -import dataclasses -from datetime import datetime - -from typing import Any - - -@dataclasses.dataclass -class SessionSummaryPromptData: - # we may allow customisation of columns included in the future, - # and we alter the columns present as we process the data - # so want to stay as loose as possible here - columns: list[str] = dataclasses.field(default_factory=list) - results: list[list[Any]] = dataclasses.field(default_factory=list) - # in order to reduce the number of tokens in the prompt - # we replace URLs with a placeholder and then pass this mapping of placeholder to URL into the prompt - url_mapping: dict[str, str] = dataclasses.field(default_factory=dict) - - # one for each result in results - processed_elements_chain: list[dict] = dataclasses.field(default_factory=list) - - def is_empty(self) -> bool: - return not self.columns or not self.results - - def column_index(self, column: str) -> int | None: - for i, c in enumerate(self.columns): - if c == column: - return i - return None - - -def simplify_window_id(session_events: SessionSummaryPromptData) -> SessionSummaryPromptData: - if session_events.is_empty(): - return session_events - - # find window_id column index - window_id_index = session_events.column_index("$window_id") - - window_id_mapping: dict[str, int] = {} - simplified_results = [] - for result in session_events.results: - if window_id_index is None: - simplified_results.append(result) - continue - - window_id: str | None = result[window_id_index] - if not window_id: - simplified_results.append(result) - continue - - if window_id not in window_id_mapping: - window_id_mapping[window_id] = len(window_id_mapping) + 1 - - result_list = list(result) - result_list[window_id_index] = window_id_mapping[window_id] - simplified_results.append(result_list) - - return dataclasses.replace(session_events, results=simplified_results) - - -def deduplicate_urls(session_events: SessionSummaryPromptData) -> SessionSummaryPromptData: - if session_events.is_empty(): - return session_events - - # find url column index - url_index = session_events.column_index("$current_url") - - url_mapping: dict[str, str] = {} - deduplicated_results = [] - for result in session_events.results: - if url_index is None: - deduplicated_results.append(result) - continue - - url: str | None = result[url_index] - if not url: - deduplicated_results.append(result) - continue - - if url not in url_mapping: - url_mapping[url] = f"url_{len(url_mapping) + 1}" - - result_list = list(result) - result_list[url_index] = url_mapping[url] - deduplicated_results.append(result_list) - - return dataclasses.replace(session_events, results=deduplicated_results, url_mapping=url_mapping) - - -def format_dates(session_events: SessionSummaryPromptData, start: datetime) -> SessionSummaryPromptData: - if session_events.is_empty(): - return session_events - - # find timestamp column index - timestamp_index = session_events.column_index("timestamp") - - if timestamp_index is None: - # no timestamp column so nothing to do - return session_events - - del session_events.columns[timestamp_index] # remove timestamp column from columns - session_events.columns.append("milliseconds_since_start") # add new column to columns at end - - formatted_results = [] - for result in session_events.results: - timestamp: datetime | None = result[timestamp_index] - if not timestamp: - formatted_results.append(result) - continue - - result_list = list(result) - # remove list item at timestamp_index - del result_list[timestamp_index] - # insert milliseconds since reference date - result_list.append(int((timestamp - start).total_seconds() * 1000)) - formatted_results.append(result_list) - - return dataclasses.replace(session_events, results=formatted_results) - - -def collapse_sequence_of_events(session_events: SessionSummaryPromptData) -> SessionSummaryPromptData: - # assumes the list is ordered by timestamp - if session_events.is_empty(): - return session_events - - # find the event column index - event_index = session_events.column_index("event") - - # find the window id column index - window_id_index = session_events.column_index("$window_id") - - event_repetition_count_index: int | None = None - # we only append this new column, if we need to add it below - - # now enumerate the results finding sequences of events with the same event and collapsing them to a single item - collapsed_results = [] - for i, result in enumerate(session_events.results): - if event_index is None: - collapsed_results.append(result) - continue - - event: str | None = result[event_index] - if not event: - collapsed_results.append(result) - continue - - if i == 0: - collapsed_results.append(result) - continue - - # we need to collapse into the last item added into collapsed results - # as we're going to amend it in place - previous_result = collapsed_results[len(collapsed_results) - 1] - previous_event: str | None = previous_result[event_index] - if not previous_event: - collapsed_results.append(result) - continue - - event_matches = previous_event == event - window_matches = previous_result[window_id_index] == result[window_id_index] if window_id_index else True - - if event_matches and window_matches: - # collapse the event into the previous result - if event_repetition_count_index is None: - # we need to add the column - event_repetition_count_index = len(session_events.columns) - session_events.columns.append("event_repetition_count") - previous_result_list = list(previous_result) - try: - existing_repetition_count = previous_result_list[event_repetition_count_index] or 0 - previous_result_list[event_repetition_count_index] = existing_repetition_count + 1 - except IndexError: - previous_result_list.append(2) - - collapsed_results[len(collapsed_results) - 1] = previous_result_list - else: - result.append(None) # there is no event repetition count - collapsed_results.append(result) - - return dataclasses.replace(session_events, results=collapsed_results) diff --git a/ee/session_recordings/persistence_tasks.py b/ee/session_recordings/persistence_tasks.py deleted file mode 100644 index b9181e361b..0000000000 --- a/ee/session_recordings/persistence_tasks.py +++ /dev/null @@ -1,42 +0,0 @@ -from datetime import timedelta - -import structlog -from celery import shared_task -from django.utils import timezone -from prometheus_client import Counter - -from ee.session_recordings.session_recording_extensions import persist_recording -from posthog.session_recordings.models.session_recording import SessionRecording -from posthog.tasks.utils import CeleryQueue - -logger = structlog.get_logger(__name__) - -REPLAY_NEEDS_PERSISTENCE_COUNTER = Counter( - "snapshot_persist_persistence_task_queued", - "Count of session recordings that need to be persisted", - # we normally avoid team label but not all teams pin recordings so there shouldn't be _too_ many labels here - labelnames=["team_id"], -) - - -@shared_task( - ignore_result=True, - queue=CeleryQueue.SESSION_REPLAY_PERSISTENCE.value, -) -def persist_single_recording(id: str, team_id: int) -> None: - persist_recording(id, team_id) - - -@shared_task( - ignore_result=True, - queue=CeleryQueue.SESSION_REPLAY_PERSISTENCE.value, -) -def persist_finished_recordings() -> None: - one_day_old = timezone.now() - timedelta(hours=24) - finished_recordings = SessionRecording.objects.filter(created_at__lte=one_day_old, object_storage_path=None) - - logger.info("Persisting finished recordings", count=finished_recordings.count()) - - for recording in finished_recordings: - REPLAY_NEEDS_PERSISTENCE_COUNTER.labels(team_id=recording.team_id).inc() - persist_single_recording.delay(recording.session_id, recording.team_id) diff --git a/ee/session_recordings/queries/__init__.py b/ee/session_recordings/queries/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/session_recordings/queries/test/__init__.py b/ee/session_recordings/queries/test/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_query.ambr b/ee/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_query.ambr deleted file mode 100644 index e3c36bc703..0000000000 --- a/ee/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_query.ambr +++ /dev/null @@ -1,1649 +0,0 @@ -# serializer version: 1 -# name: TestClickhouseSessionRecordingsListFromQuery.test_effect_of_poe_settings_on_query_generated_0_test_poe_v1_still_falls_back_to_person_subquery - ''' - SELECT s.session_id AS session_id, - any(s.team_id), - any(s.distinct_id), - min(toTimeZone(s.min_first_timestamp, %(hogql_val_0)s)) AS start_time, - max(toTimeZone(s.max_last_timestamp, %(hogql_val_1)s)) AS end_time, - dateDiff(%(hogql_val_2)s, start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, %(hogql_val_3)s)), toDateTime64('2021-01-01 13:41:23.000000', 6, 'UTC')), 0) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff(%(hogql_val_4)s, start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, %(hogql_val_5)s), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, %(hogql_val_6)s), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, %(hogql_val_7)s), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0), globalIn(s.session_id, - (SELECT events.`$session_id` AS session_id - FROM events - WHERE and(equals(events.team_id, 99999), notEmpty(events.`$session_id`), greaterOrEquals(toTimeZone(events.timestamp, %(hogql_val_8)s), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, %(hogql_val_9)s), now64(6, %(hogql_val_10)s)), greaterOrEquals(toTimeZone(events.timestamp, %(hogql_val_11)s), toDateTime64('2020-12-28 23:58:00.000000', 6, 'UTC')), ifNull(equals(nullIf(nullIf(events.mat_pp_rgInternal, ''), 'null'), %(hogql_val_12)s), 0)) - GROUP BY events.`$session_id` - HAVING 1))) - GROUP BY s.session_id - HAVING 1 - ORDER BY start_time DESC - LIMIT 50000 - ''' -# --- -# name: TestClickhouseSessionRecordingsListFromQuery.test_effect_of_poe_settings_on_query_generated_1_test_poe_being_unavailable_we_fall_back_to_person_id_overrides - ''' - SELECT s.session_id AS session_id, - any(s.team_id), - any(s.distinct_id), - min(toTimeZone(s.min_first_timestamp, %(hogql_val_0)s)) AS start_time, - max(toTimeZone(s.max_last_timestamp, %(hogql_val_1)s)) AS end_time, - dateDiff(%(hogql_val_2)s, start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, %(hogql_val_3)s)), toDateTime64('2021-01-01 13:41:23.000000', 6, 'UTC')), 0) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff(%(hogql_val_4)s, start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, %(hogql_val_5)s), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, %(hogql_val_6)s), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, %(hogql_val_7)s), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0), globalIn(s.session_id, - (SELECT events.`$session_id` AS session_id - FROM events - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) - LEFT JOIN - (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_8)s), ''), 'null'), '^"|"$', ''), person.version) AS properties___rgInternal, person.id AS id - FROM person - WHERE equals(person.team_id, 99999) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, %(hogql_val_9)s), person.version), plus(now64(6, %(hogql_val_10)s), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS events__person ON equals(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), events__person.id) - WHERE and(equals(events.team_id, 99999), notEmpty(events.`$session_id`), greaterOrEquals(toTimeZone(events.timestamp, %(hogql_val_11)s), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, %(hogql_val_12)s), now64(6, %(hogql_val_13)s)), greaterOrEquals(toTimeZone(events.timestamp, %(hogql_val_14)s), toDateTime64('2020-12-28 23:58:00.000000', 6, 'UTC')), ifNull(equals(events__person.properties___rgInternal, %(hogql_val_15)s), 0)) - GROUP BY events.`$session_id` - HAVING 1))) - GROUP BY s.session_id - HAVING 1 - ORDER BY start_time DESC - LIMIT 50000 - ''' -# --- -# name: TestClickhouseSessionRecordingsListFromQuery.test_effect_of_poe_settings_on_query_generated_2_test_poe_being_unavailable_we_fall_back_to_person_subquery_but_still_use_mat_props - ''' - SELECT s.session_id AS session_id, - any(s.team_id), - any(s.distinct_id), - min(toTimeZone(s.min_first_timestamp, %(hogql_val_0)s)) AS start_time, - max(toTimeZone(s.max_last_timestamp, %(hogql_val_1)s)) AS end_time, - dateDiff(%(hogql_val_2)s, start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, %(hogql_val_3)s)), toDateTime64('2021-01-01 13:41:23.000000', 6, 'UTC')), 0) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff(%(hogql_val_4)s, start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, %(hogql_val_5)s), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, %(hogql_val_6)s), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, %(hogql_val_7)s), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0), globalIn(s.session_id, - (SELECT events.`$session_id` AS session_id - FROM events - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) - LEFT JOIN - (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_8)s), ''), 'null'), '^"|"$', ''), person.version) AS properties___rgInternal, person.id AS id - FROM person - WHERE equals(person.team_id, 99999) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, %(hogql_val_9)s), person.version), plus(now64(6, %(hogql_val_10)s), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS events__person ON equals(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), events__person.id) - WHERE and(equals(events.team_id, 99999), notEmpty(events.`$session_id`), greaterOrEquals(toTimeZone(events.timestamp, %(hogql_val_11)s), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, %(hogql_val_12)s), now64(6, %(hogql_val_13)s)), greaterOrEquals(toTimeZone(events.timestamp, %(hogql_val_14)s), toDateTime64('2020-12-28 23:58:00.000000', 6, 'UTC')), ifNull(equals(events__person.properties___rgInternal, %(hogql_val_15)s), 0)) - GROUP BY events.`$session_id` - HAVING 1))) - GROUP BY s.session_id - HAVING 1 - ORDER BY start_time DESC - LIMIT 50000 - ''' -# --- -# name: TestClickhouseSessionRecordingsListFromQuery.test_effect_of_poe_settings_on_query_generated_3_test_allow_denormalised_props_fix_does_not_stop_all_poe_processing - ''' - SELECT s.session_id AS session_id, - any(s.team_id), - any(s.distinct_id), - min(toTimeZone(s.min_first_timestamp, %(hogql_val_0)s)) AS start_time, - max(toTimeZone(s.max_last_timestamp, %(hogql_val_1)s)) AS end_time, - dateDiff(%(hogql_val_2)s, start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, %(hogql_val_3)s)), toDateTime64('2021-01-01 13:41:23.000000', 6, 'UTC')), 0) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff(%(hogql_val_4)s, start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, %(hogql_val_5)s), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, %(hogql_val_6)s), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, %(hogql_val_7)s), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0), globalIn(s.session_id, - (SELECT events.`$session_id` AS session_id - FROM events - WHERE and(equals(events.team_id, 99999), notEmpty(events.`$session_id`), greaterOrEquals(toTimeZone(events.timestamp, %(hogql_val_8)s), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, %(hogql_val_9)s), now64(6, %(hogql_val_10)s)), greaterOrEquals(toTimeZone(events.timestamp, %(hogql_val_11)s), toDateTime64('2020-12-28 23:58:00.000000', 6, 'UTC')), ifNull(equals(nullIf(nullIf(events.mat_pp_rgInternal, ''), 'null'), %(hogql_val_12)s), 0)) - GROUP BY events.`$session_id` - HAVING 1))) - GROUP BY s.session_id - HAVING 1 - ORDER BY start_time DESC - LIMIT 50000 - ''' -# --- -# name: TestClickhouseSessionRecordingsListFromQuery.test_effect_of_poe_settings_on_query_generated_4_test_poe_v2_available_person_properties_are_used_in_replay_listing - ''' - SELECT s.session_id AS session_id, - any(s.team_id), - any(s.distinct_id), - min(toTimeZone(s.min_first_timestamp, %(hogql_val_0)s)) AS start_time, - max(toTimeZone(s.max_last_timestamp, %(hogql_val_1)s)) AS end_time, - dateDiff(%(hogql_val_2)s, start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, %(hogql_val_3)s)), toDateTime64('2021-01-01 13:41:23.000000', 6, 'UTC')), 0) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff(%(hogql_val_4)s, start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, %(hogql_val_5)s), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, %(hogql_val_6)s), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, %(hogql_val_7)s), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0), globalIn(s.session_id, - (SELECT events.`$session_id` AS session_id - FROM events - WHERE and(equals(events.team_id, 99999), notEmpty(events.`$session_id`), greaterOrEquals(toTimeZone(events.timestamp, %(hogql_val_8)s), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, %(hogql_val_9)s), now64(6, %(hogql_val_10)s)), greaterOrEquals(toTimeZone(events.timestamp, %(hogql_val_11)s), toDateTime64('2020-12-28 23:58:00.000000', 6, 'UTC')), ifNull(equals(nullIf(nullIf(events.mat_pp_rgInternal, ''), 'null'), %(hogql_val_12)s), 0)) - GROUP BY events.`$session_id` - HAVING 1))) - GROUP BY s.session_id - HAVING 1 - ORDER BY start_time DESC - LIMIT 50000 - ''' -# --- -# name: TestClickhouseSessionRecordingsListFromQuery.test_event_filter_with_person_properties_materialized_00_poe_v2_and_materialized_columns_allowed_with_materialization - ''' - SELECT s.session_id AS session_id, - any(s.team_id), - any(s.distinct_id), - min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, - max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, - dateDiff('SECOND', start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-01-01 13:41:23.000000', 6, 'UTC')), 0) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0)) - GROUP BY s.session_id - HAVING 1 - ORDER BY start_time DESC - LIMIT 51 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - allow_experimental_analyzer=0 - ''' -# --- -# name: TestClickhouseSessionRecordingsListFromQuery.test_event_filter_with_person_properties_materialized_00_poe_v2_and_materialized_columns_allowed_with_materialization.1 - ''' - SELECT s.session_id AS session_id, - any(s.team_id), - any(s.distinct_id), - min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, - max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, - dateDiff('SECOND', start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-01-01 13:41:23.000000', 6, 'UTC')), 0) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0), globalIn(s.session_id, - (SELECT events.`$session_id` AS session_id - FROM events - WHERE and(equals(events.team_id, 99999), notEmpty(events.`$session_id`), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), now64(6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-28 23:58:00.000000', 6, 'UTC')), ifNull(equals(nullIf(nullIf(events.mat_pp_email, ''), 'null'), 'bla'), 0)) - GROUP BY events.`$session_id` - HAVING 1))) - GROUP BY s.session_id - HAVING 1 - ORDER BY start_time DESC - LIMIT 51 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - allow_experimental_analyzer=0 - ''' -# --- -# name: TestClickhouseSessionRecordingsListFromQuery.test_event_filter_with_person_properties_materialized_01_poe_v2_and_materialized_columns_allowed_without_materialization - ''' - SELECT s.session_id AS session_id, - any(s.team_id), - any(s.distinct_id), - min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, - max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, - dateDiff('SECOND', start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-01-01 13:41:23.000000', 6, 'UTC')), 0) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0)) - GROUP BY s.session_id - HAVING 1 - ORDER BY start_time DESC - LIMIT 51 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - allow_experimental_analyzer=0 - ''' -# --- -# name: TestClickhouseSessionRecordingsListFromQuery.test_event_filter_with_person_properties_materialized_01_poe_v2_and_materialized_columns_allowed_without_materialization.1 - ''' - SELECT s.session_id AS session_id, - any(s.team_id), - any(s.distinct_id), - min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, - max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, - dateDiff('SECOND', start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-01-01 13:41:23.000000', 6, 'UTC')), 0) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0), globalIn(s.session_id, - (SELECT events.`$session_id` AS session_id - FROM events - WHERE and(equals(events.team_id, 99999), notEmpty(events.`$session_id`), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), now64(6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-28 23:58:00.000000', 6, 'UTC')), ifNull(equals(nullIf(nullIf(events.mat_pp_email, ''), 'null'), 'bla'), 0)) - GROUP BY events.`$session_id` - HAVING 1))) - GROUP BY s.session_id - HAVING 1 - ORDER BY start_time DESC - LIMIT 51 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - allow_experimental_analyzer=0 - ''' -# --- -# name: TestClickhouseSessionRecordingsListFromQuery.test_event_filter_with_person_properties_materialized_02_poe_v2_and_materialized_columns_off_with_materialization - ''' - SELECT s.session_id AS session_id, - any(s.team_id), - any(s.distinct_id), - min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, - max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, - dateDiff('SECOND', start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-01-01 13:41:23.000000', 6, 'UTC')), 0) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0)) - GROUP BY s.session_id - HAVING 1 - ORDER BY start_time DESC - LIMIT 51 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - allow_experimental_analyzer=0 - ''' -# --- -# name: TestClickhouseSessionRecordingsListFromQuery.test_event_filter_with_person_properties_materialized_02_poe_v2_and_materialized_columns_off_with_materialization.1 - ''' - SELECT s.session_id AS session_id, - any(s.team_id), - any(s.distinct_id), - min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, - max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, - dateDiff('SECOND', start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-01-01 13:41:23.000000', 6, 'UTC')), 0) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0), globalIn(s.session_id, - (SELECT events.`$session_id` AS session_id - FROM events - WHERE and(equals(events.team_id, 99999), notEmpty(events.`$session_id`), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), now64(6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-28 23:58:00.000000', 6, 'UTC')), ifNull(equals(nullIf(nullIf(events.mat_pp_email, ''), 'null'), 'bla'), 0)) - GROUP BY events.`$session_id` - HAVING 1))) - GROUP BY s.session_id - HAVING 1 - ORDER BY start_time DESC - LIMIT 51 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - allow_experimental_analyzer=0 - ''' -# --- -# name: TestClickhouseSessionRecordingsListFromQuery.test_event_filter_with_person_properties_materialized_03_poe_v2_and_materialized_columns_off_without_materialization - ''' - SELECT s.session_id AS session_id, - any(s.team_id), - any(s.distinct_id), - min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, - max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, - dateDiff('SECOND', start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-01-01 13:41:23.000000', 6, 'UTC')), 0) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0)) - GROUP BY s.session_id - HAVING 1 - ORDER BY start_time DESC - LIMIT 51 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - allow_experimental_analyzer=0 - ''' -# --- -# name: TestClickhouseSessionRecordingsListFromQuery.test_event_filter_with_person_properties_materialized_03_poe_v2_and_materialized_columns_off_without_materialization.1 - ''' - SELECT s.session_id AS session_id, - any(s.team_id), - any(s.distinct_id), - min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, - max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, - dateDiff('SECOND', start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-01-01 13:41:23.000000', 6, 'UTC')), 0) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0), globalIn(s.session_id, - (SELECT events.`$session_id` AS session_id - FROM events - WHERE and(equals(events.team_id, 99999), notEmpty(events.`$session_id`), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), now64(6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-28 23:58:00.000000', 6, 'UTC')), ifNull(equals(nullIf(nullIf(events.mat_pp_email, ''), 'null'), 'bla'), 0)) - GROUP BY events.`$session_id` - HAVING 1))) - GROUP BY s.session_id - HAVING 1 - ORDER BY start_time DESC - LIMIT 51 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - allow_experimental_analyzer=0 - ''' -# --- -# name: TestClickhouseSessionRecordingsListFromQuery.test_event_filter_with_person_properties_materialized_04_poe_off_and_materialized_columns_allowed_with_materialization - ''' - SELECT s.session_id AS session_id, - any(s.team_id), - any(s.distinct_id), - min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, - max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, - dateDiff('SECOND', start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-01-01 13:41:23.000000', 6, 'UTC')), 0) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0)) - GROUP BY s.session_id - HAVING 1 - ORDER BY start_time DESC - LIMIT 51 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - allow_experimental_analyzer=0 - ''' -# --- -# name: TestClickhouseSessionRecordingsListFromQuery.test_event_filter_with_person_properties_materialized_04_poe_off_and_materialized_columns_allowed_with_materialization.1 - ''' - SELECT s.session_id AS session_id, - any(s.team_id), - any(s.distinct_id), - min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, - max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, - dateDiff('SECOND', start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-01-01 13:41:23.000000', 6, 'UTC')), 0) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0), globalIn(s.session_id, - (SELECT events.`$session_id` AS session_id - FROM events - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) - LEFT JOIN - (SELECT person.id AS id, nullIf(nullIf(person.pmat_email, ''), 'null') AS properties___email - FROM person - WHERE and(equals(person.team_id, 99999), ifNull(in(tuple(person.id, person.version), - (SELECT person.id AS id, max(person.version) AS version - FROM person - WHERE equals(person.team_id, 99999) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS events__person ON equals(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), events__person.id) - WHERE and(equals(events.team_id, 99999), notEmpty(events.`$session_id`), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), now64(6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-28 23:58:00.000000', 6, 'UTC')), ifNull(equals(events__person.properties___email, 'bla'), 0)) - GROUP BY events.`$session_id` - HAVING 1))) - GROUP BY s.session_id - HAVING 1 - ORDER BY start_time DESC - LIMIT 51 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - allow_experimental_analyzer=0 - ''' -# --- -# name: TestClickhouseSessionRecordingsListFromQuery.test_event_filter_with_person_properties_materialized_05_poe_off_and_materialized_columns_allowed_without_materialization - ''' - SELECT s.session_id AS session_id, - any(s.team_id), - any(s.distinct_id), - min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, - max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, - dateDiff('SECOND', start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-01-01 13:41:23.000000', 6, 'UTC')), 0) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0)) - GROUP BY s.session_id - HAVING 1 - ORDER BY start_time DESC - LIMIT 51 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - allow_experimental_analyzer=0 - ''' -# --- -# name: TestClickhouseSessionRecordingsListFromQuery.test_event_filter_with_person_properties_materialized_05_poe_off_and_materialized_columns_allowed_without_materialization.1 - ''' - SELECT s.session_id AS session_id, - any(s.team_id), - any(s.distinct_id), - min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, - max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, - dateDiff('SECOND', start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-01-01 13:41:23.000000', 6, 'UTC')), 0) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0), globalIn(s.session_id, - (SELECT events.`$session_id` AS session_id - FROM events - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) - LEFT JOIN - (SELECT person.id AS id, nullIf(nullIf(person.pmat_email, ''), 'null') AS properties___email - FROM person - WHERE and(equals(person.team_id, 99999), ifNull(in(tuple(person.id, person.version), - (SELECT person.id AS id, max(person.version) AS version - FROM person - WHERE equals(person.team_id, 99999) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS events__person ON equals(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), events__person.id) - WHERE and(equals(events.team_id, 99999), notEmpty(events.`$session_id`), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), now64(6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-28 23:58:00.000000', 6, 'UTC')), ifNull(equals(events__person.properties___email, 'bla'), 0)) - GROUP BY events.`$session_id` - HAVING 1))) - GROUP BY s.session_id - HAVING 1 - ORDER BY start_time DESC - LIMIT 51 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - allow_experimental_analyzer=0 - ''' -# --- -# name: TestClickhouseSessionRecordingsListFromQuery.test_event_filter_with_person_properties_materialized_06_poe_off_and_materialized_columns_not_allowed_with_materialization - ''' - SELECT s.session_id AS session_id, - any(s.team_id), - any(s.distinct_id), - min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, - max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, - dateDiff('SECOND', start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-01-01 13:41:23.000000', 6, 'UTC')), 0) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0)) - GROUP BY s.session_id - HAVING 1 - ORDER BY start_time DESC - LIMIT 51 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - allow_experimental_analyzer=0 - ''' -# --- -# name: TestClickhouseSessionRecordingsListFromQuery.test_event_filter_with_person_properties_materialized_06_poe_off_and_materialized_columns_not_allowed_with_materialization.1 - ''' - SELECT s.session_id AS session_id, - any(s.team_id), - any(s.distinct_id), - min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, - max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, - dateDiff('SECOND', start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-01-01 13:41:23.000000', 6, 'UTC')), 0) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0), globalIn(s.session_id, - (SELECT events.`$session_id` AS session_id - FROM events - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) - LEFT JOIN - (SELECT person.id AS id, nullIf(nullIf(person.pmat_email, ''), 'null') AS properties___email - FROM person - WHERE and(equals(person.team_id, 99999), ifNull(in(tuple(person.id, person.version), - (SELECT person.id AS id, max(person.version) AS version - FROM person - WHERE equals(person.team_id, 99999) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS events__person ON equals(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), events__person.id) - WHERE and(equals(events.team_id, 99999), notEmpty(events.`$session_id`), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), now64(6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-28 23:58:00.000000', 6, 'UTC')), ifNull(equals(events__person.properties___email, 'bla'), 0)) - GROUP BY events.`$session_id` - HAVING 1))) - GROUP BY s.session_id - HAVING 1 - ORDER BY start_time DESC - LIMIT 51 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - allow_experimental_analyzer=0 - ''' -# --- -# name: TestClickhouseSessionRecordingsListFromQuery.test_event_filter_with_person_properties_materialized_07_poe_off_and_materialized_columns_not_allowed_without_materialization - ''' - SELECT s.session_id AS session_id, - any(s.team_id), - any(s.distinct_id), - min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, - max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, - dateDiff('SECOND', start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-01-01 13:41:23.000000', 6, 'UTC')), 0) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0)) - GROUP BY s.session_id - HAVING 1 - ORDER BY start_time DESC - LIMIT 51 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - allow_experimental_analyzer=0 - ''' -# --- -# name: TestClickhouseSessionRecordingsListFromQuery.test_event_filter_with_person_properties_materialized_07_poe_off_and_materialized_columns_not_allowed_without_materialization.1 - ''' - SELECT s.session_id AS session_id, - any(s.team_id), - any(s.distinct_id), - min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, - max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, - dateDiff('SECOND', start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-01-01 13:41:23.000000', 6, 'UTC')), 0) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0), globalIn(s.session_id, - (SELECT events.`$session_id` AS session_id - FROM events - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) - LEFT JOIN - (SELECT person.id AS id, nullIf(nullIf(person.pmat_email, ''), 'null') AS properties___email - FROM person - WHERE and(equals(person.team_id, 99999), ifNull(in(tuple(person.id, person.version), - (SELECT person.id AS id, max(person.version) AS version - FROM person - WHERE equals(person.team_id, 99999) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS events__person ON equals(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), events__person.id) - WHERE and(equals(events.team_id, 99999), notEmpty(events.`$session_id`), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), now64(6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-28 23:58:00.000000', 6, 'UTC')), ifNull(equals(events__person.properties___email, 'bla'), 0)) - GROUP BY events.`$session_id` - HAVING 1))) - GROUP BY s.session_id - HAVING 1 - ORDER BY start_time DESC - LIMIT 51 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - allow_experimental_analyzer=0 - ''' -# --- -# name: TestClickhouseSessionRecordingsListFromQuery.test_event_filter_with_person_properties_materialized_08_poe_v1_and_materialized_columns_allowed_with_materialization - ''' - SELECT s.session_id AS session_id, - any(s.team_id), - any(s.distinct_id), - min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, - max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, - dateDiff('SECOND', start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-01-01 13:41:23.000000', 6, 'UTC')), 0) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0)) - GROUP BY s.session_id - HAVING 1 - ORDER BY start_time DESC - LIMIT 51 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - allow_experimental_analyzer=0 - ''' -# --- -# name: TestClickhouseSessionRecordingsListFromQuery.test_event_filter_with_person_properties_materialized_08_poe_v1_and_materialized_columns_allowed_with_materialization.1 - ''' - SELECT s.session_id AS session_id, - any(s.team_id), - any(s.distinct_id), - min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, - max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, - dateDiff('SECOND', start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-01-01 13:41:23.000000', 6, 'UTC')), 0) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0), globalIn(s.session_id, - (SELECT events.`$session_id` AS session_id - FROM events - WHERE and(equals(events.team_id, 99999), notEmpty(events.`$session_id`), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), now64(6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-28 23:58:00.000000', 6, 'UTC')), ifNull(equals(nullIf(nullIf(events.mat_pp_email, ''), 'null'), 'bla'), 0)) - GROUP BY events.`$session_id` - HAVING 1))) - GROUP BY s.session_id - HAVING 1 - ORDER BY start_time DESC - LIMIT 51 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - allow_experimental_analyzer=0 - ''' -# --- -# name: TestClickhouseSessionRecordingsListFromQuery.test_event_filter_with_person_properties_materialized_09_poe_v1_and_materialized_columns_allowed_without_materialization - ''' - SELECT s.session_id AS session_id, - any(s.team_id), - any(s.distinct_id), - min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, - max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, - dateDiff('SECOND', start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-01-01 13:41:23.000000', 6, 'UTC')), 0) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0)) - GROUP BY s.session_id - HAVING 1 - ORDER BY start_time DESC - LIMIT 51 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - allow_experimental_analyzer=0 - ''' -# --- -# name: TestClickhouseSessionRecordingsListFromQuery.test_event_filter_with_person_properties_materialized_09_poe_v1_and_materialized_columns_allowed_without_materialization.1 - ''' - SELECT s.session_id AS session_id, - any(s.team_id), - any(s.distinct_id), - min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, - max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, - dateDiff('SECOND', start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-01-01 13:41:23.000000', 6, 'UTC')), 0) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0), globalIn(s.session_id, - (SELECT events.`$session_id` AS session_id - FROM events - WHERE and(equals(events.team_id, 99999), notEmpty(events.`$session_id`), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), now64(6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-28 23:58:00.000000', 6, 'UTC')), ifNull(equals(nullIf(nullIf(events.mat_pp_email, ''), 'null'), 'bla'), 0)) - GROUP BY events.`$session_id` - HAVING 1))) - GROUP BY s.session_id - HAVING 1 - ORDER BY start_time DESC - LIMIT 51 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - allow_experimental_analyzer=0 - ''' -# --- -# name: TestClickhouseSessionRecordingsListFromQuery.test_event_filter_with_person_properties_materialized_10_poe_v1_and_not_materialized_columns_not_allowed_with_materialization - ''' - SELECT s.session_id AS session_id, - any(s.team_id), - any(s.distinct_id), - min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, - max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, - dateDiff('SECOND', start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-01-01 13:41:23.000000', 6, 'UTC')), 0) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0)) - GROUP BY s.session_id - HAVING 1 - ORDER BY start_time DESC - LIMIT 51 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - allow_experimental_analyzer=0 - ''' -# --- -# name: TestClickhouseSessionRecordingsListFromQuery.test_event_filter_with_person_properties_materialized_10_poe_v1_and_not_materialized_columns_not_allowed_with_materialization.1 - ''' - SELECT s.session_id AS session_id, - any(s.team_id), - any(s.distinct_id), - min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, - max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, - dateDiff('SECOND', start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-01-01 13:41:23.000000', 6, 'UTC')), 0) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0), globalIn(s.session_id, - (SELECT events.`$session_id` AS session_id - FROM events - WHERE and(equals(events.team_id, 99999), notEmpty(events.`$session_id`), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), now64(6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-28 23:58:00.000000', 6, 'UTC')), ifNull(equals(nullIf(nullIf(events.mat_pp_email, ''), 'null'), 'bla'), 0)) - GROUP BY events.`$session_id` - HAVING 1))) - GROUP BY s.session_id - HAVING 1 - ORDER BY start_time DESC - LIMIT 51 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - allow_experimental_analyzer=0 - ''' -# --- -# name: TestClickhouseSessionRecordingsListFromQuery.test_event_filter_with_person_properties_materialized_11_poe_v1_and_not_materialized_columns_not_allowed_without_materialization - ''' - SELECT s.session_id AS session_id, - any(s.team_id), - any(s.distinct_id), - min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, - max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, - dateDiff('SECOND', start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-01-01 13:41:23.000000', 6, 'UTC')), 0) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0)) - GROUP BY s.session_id - HAVING 1 - ORDER BY start_time DESC - LIMIT 51 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - allow_experimental_analyzer=0 - ''' -# --- -# name: TestClickhouseSessionRecordingsListFromQuery.test_event_filter_with_person_properties_materialized_11_poe_v1_and_not_materialized_columns_not_allowed_without_materialization.1 - ''' - SELECT s.session_id AS session_id, - any(s.team_id), - any(s.distinct_id), - min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, - max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, - dateDiff('SECOND', start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-01-01 13:41:23.000000', 6, 'UTC')), 0) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0), globalIn(s.session_id, - (SELECT events.`$session_id` AS session_id - FROM events - WHERE and(equals(events.team_id, 99999), notEmpty(events.`$session_id`), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), now64(6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-28 23:58:00.000000', 6, 'UTC')), ifNull(equals(nullIf(nullIf(events.mat_pp_email, ''), 'null'), 'bla'), 0)) - GROUP BY events.`$session_id` - HAVING 1))) - GROUP BY s.session_id - HAVING 1 - ORDER BY start_time DESC - LIMIT 51 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - allow_experimental_analyzer=0 - ''' -# --- -# name: TestClickhouseSessionRecordingsListFromQuery.test_person_id_filter_00_poe_v2_and_materialized_columns_allowed_with_materialization - ''' - SELECT s.session_id AS session_id, - any(s.team_id), - any(s.distinct_id), - min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, - max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, - dateDiff('SECOND', start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-01-01 13:41:23.000000', 6, 'UTC')), 0) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), globalIn(s.session_id, - (SELECT DISTINCT events.`$session_id` AS `$session_id` - FROM events - WHERE and(equals(events.team_id, 99999), equals(events.person_id, '00000000-0000-0000-0000-000000000000'), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), notEmpty(events.`$session_id`)))), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0)) - GROUP BY s.session_id - HAVING 1 - ORDER BY start_time DESC - LIMIT 51 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - allow_experimental_analyzer=0 - ''' -# --- -# name: TestClickhouseSessionRecordingsListFromQuery.test_person_id_filter_01_poe_v2_and_materialized_columns_allowed_without_materialization - ''' - SELECT s.session_id AS session_id, - any(s.team_id), - any(s.distinct_id), - min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, - max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, - dateDiff('SECOND', start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-01-01 13:41:23.000000', 6, 'UTC')), 0) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), globalIn(s.session_id, - (SELECT DISTINCT events.`$session_id` AS `$session_id` - FROM events - WHERE and(equals(events.team_id, 99999), equals(events.person_id, '00000000-0000-0000-0000-000000000000'), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), notEmpty(events.`$session_id`)))), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0)) - GROUP BY s.session_id - HAVING 1 - ORDER BY start_time DESC - LIMIT 51 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - allow_experimental_analyzer=0 - ''' -# --- -# name: TestClickhouseSessionRecordingsListFromQuery.test_person_id_filter_02_poe_v2_and_materialized_columns_off_with_materialization - ''' - SELECT s.session_id AS session_id, - any(s.team_id), - any(s.distinct_id), - min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, - max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, - dateDiff('SECOND', start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-01-01 13:41:23.000000', 6, 'UTC')), 0) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), globalIn(s.session_id, - (SELECT DISTINCT events.`$session_id` AS `$session_id` - FROM events - WHERE and(equals(events.team_id, 99999), equals(events.person_id, '00000000-0000-0000-0000-000000000000'), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), notEmpty(events.`$session_id`)))), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0)) - GROUP BY s.session_id - HAVING 1 - ORDER BY start_time DESC - LIMIT 51 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - allow_experimental_analyzer=0 - ''' -# --- -# name: TestClickhouseSessionRecordingsListFromQuery.test_person_id_filter_03_poe_v2_and_materialized_columns_off_without_materialization - ''' - SELECT s.session_id AS session_id, - any(s.team_id), - any(s.distinct_id), - min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, - max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, - dateDiff('SECOND', start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-01-01 13:41:23.000000', 6, 'UTC')), 0) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), globalIn(s.session_id, - (SELECT DISTINCT events.`$session_id` AS `$session_id` - FROM events - WHERE and(equals(events.team_id, 99999), equals(events.person_id, '00000000-0000-0000-0000-000000000000'), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), notEmpty(events.`$session_id`)))), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0)) - GROUP BY s.session_id - HAVING 1 - ORDER BY start_time DESC - LIMIT 51 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - allow_experimental_analyzer=0 - ''' -# --- -# name: TestClickhouseSessionRecordingsListFromQuery.test_person_id_filter_04_poe_off_and_materialized_columns_allowed_with_materialization - ''' - SELECT s.session_id AS session_id, - any(s.team_id), - any(s.distinct_id), - min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, - max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, - dateDiff('SECOND', start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-01-01 13:41:23.000000', 6, 'UTC')), 0) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), globalIn(s.session_id, - (SELECT DISTINCT events.`$session_id` AS `$session_id` - FROM events - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) - WHERE and(equals(events.team_id, 99999), ifNull(equals(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), '00000000-0000-0000-0000-000000000000'), 0), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), notEmpty(events.`$session_id`)))), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0)) - GROUP BY s.session_id - HAVING 1 - ORDER BY start_time DESC - LIMIT 51 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - allow_experimental_analyzer=0 - ''' -# --- -# name: TestClickhouseSessionRecordingsListFromQuery.test_person_id_filter_05_poe_off_and_materialized_columns_allowed_without_materialization - ''' - SELECT s.session_id AS session_id, - any(s.team_id), - any(s.distinct_id), - min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, - max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, - dateDiff('SECOND', start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-01-01 13:41:23.000000', 6, 'UTC')), 0) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), globalIn(s.session_id, - (SELECT DISTINCT events.`$session_id` AS `$session_id` - FROM events - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) - WHERE and(equals(events.team_id, 99999), ifNull(equals(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), '00000000-0000-0000-0000-000000000000'), 0), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), notEmpty(events.`$session_id`)))), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0)) - GROUP BY s.session_id - HAVING 1 - ORDER BY start_time DESC - LIMIT 51 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - allow_experimental_analyzer=0 - ''' -# --- -# name: TestClickhouseSessionRecordingsListFromQuery.test_person_id_filter_06_poe_off_and_materialized_columns_not_allowed_with_materialization - ''' - SELECT s.session_id AS session_id, - any(s.team_id), - any(s.distinct_id), - min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, - max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, - dateDiff('SECOND', start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-01-01 13:41:23.000000', 6, 'UTC')), 0) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), globalIn(s.session_id, - (SELECT DISTINCT events.`$session_id` AS `$session_id` - FROM events - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) - WHERE and(equals(events.team_id, 99999), ifNull(equals(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), '00000000-0000-0000-0000-000000000000'), 0), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), notEmpty(events.`$session_id`)))), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0)) - GROUP BY s.session_id - HAVING 1 - ORDER BY start_time DESC - LIMIT 51 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - allow_experimental_analyzer=0 - ''' -# --- -# name: TestClickhouseSessionRecordingsListFromQuery.test_person_id_filter_07_poe_off_and_materialized_columns_not_allowed_without_materialization - ''' - SELECT s.session_id AS session_id, - any(s.team_id), - any(s.distinct_id), - min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, - max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, - dateDiff('SECOND', start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-01-01 13:41:23.000000', 6, 'UTC')), 0) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), globalIn(s.session_id, - (SELECT DISTINCT events.`$session_id` AS `$session_id` - FROM events - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) - WHERE and(equals(events.team_id, 99999), ifNull(equals(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), '00000000-0000-0000-0000-000000000000'), 0), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), notEmpty(events.`$session_id`)))), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0)) - GROUP BY s.session_id - HAVING 1 - ORDER BY start_time DESC - LIMIT 51 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - allow_experimental_analyzer=0 - ''' -# --- -# name: TestClickhouseSessionRecordingsListFromQuery.test_person_id_filter_08_poe_v1_and_materialized_columns_allowed_with_materialization - ''' - SELECT s.session_id AS session_id, - any(s.team_id), - any(s.distinct_id), - min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, - max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, - dateDiff('SECOND', start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-01-01 13:41:23.000000', 6, 'UTC')), 0) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), globalIn(s.session_id, - (SELECT DISTINCT events.`$session_id` AS `$session_id` - FROM events - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) - WHERE and(equals(events.team_id, 99999), ifNull(equals(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), '00000000-0000-0000-0000-000000000000'), 0), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), notEmpty(events.`$session_id`)))), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0)) - GROUP BY s.session_id - HAVING 1 - ORDER BY start_time DESC - LIMIT 51 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - allow_experimental_analyzer=0 - ''' -# --- -# name: TestClickhouseSessionRecordingsListFromQuery.test_person_id_filter_09_poe_v1_and_materialized_columns_allowed_without_materialization - ''' - SELECT s.session_id AS session_id, - any(s.team_id), - any(s.distinct_id), - min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, - max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, - dateDiff('SECOND', start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-01-01 13:41:23.000000', 6, 'UTC')), 0) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), globalIn(s.session_id, - (SELECT DISTINCT events.`$session_id` AS `$session_id` - FROM events - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) - WHERE and(equals(events.team_id, 99999), ifNull(equals(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), '00000000-0000-0000-0000-000000000000'), 0), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), notEmpty(events.`$session_id`)))), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0)) - GROUP BY s.session_id - HAVING 1 - ORDER BY start_time DESC - LIMIT 51 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - allow_experimental_analyzer=0 - ''' -# --- -# name: TestClickhouseSessionRecordingsListFromQuery.test_person_id_filter_10_poe_v1_and_not_materialized_columns_not_allowed_with_materialization - ''' - SELECT s.session_id AS session_id, - any(s.team_id), - any(s.distinct_id), - min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, - max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, - dateDiff('SECOND', start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-01-01 13:41:23.000000', 6, 'UTC')), 0) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), globalIn(s.session_id, - (SELECT DISTINCT events.`$session_id` AS `$session_id` - FROM events - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) - WHERE and(equals(events.team_id, 99999), ifNull(equals(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), '00000000-0000-0000-0000-000000000000'), 0), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), notEmpty(events.`$session_id`)))), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0)) - GROUP BY s.session_id - HAVING 1 - ORDER BY start_time DESC - LIMIT 51 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - allow_experimental_analyzer=0 - ''' -# --- -# name: TestClickhouseSessionRecordingsListFromQuery.test_person_id_filter_11_poe_v1_and_not_materialized_columns_not_allowed_without_materialization - ''' - SELECT s.session_id AS session_id, - any(s.team_id), - any(s.distinct_id), - min(toTimeZone(s.min_first_timestamp, 'UTC')) AS start_time, - max(toTimeZone(s.max_last_timestamp, 'UTC')) AS end_time, - dateDiff('SECOND', start_time, end_time) AS duration, - argMinMerge(s.first_url) AS first_url, - sum(s.click_count) AS click_count, - sum(s.keypress_count) AS keypress_count, - sum(s.mouse_activity_count) AS mouse_activity_count, - divide(sum(s.active_milliseconds), 1000) AS active_seconds, - minus(duration, active_seconds) AS inactive_seconds, - sum(s.console_log_count) AS console_log_count, - sum(s.console_warn_count) AS console_warn_count, - sum(s.console_error_count) AS console_error_count, - ifNull(greaterOrEquals(max(toTimeZone(s._timestamp, 'UTC')), toDateTime64('2021-01-01 13:41:23.000000', 6, 'UTC')), 0) AS ongoing, - round(multiply(divide(plus(plus(plus(divide(sum(s.active_milliseconds), 1000), sum(s.click_count)), sum(s.keypress_count)), sum(s.console_error_count)), plus(plus(plus(plus(sum(s.mouse_activity_count), dateDiff('SECOND', start_time, end_time)), sum(s.console_error_count)), sum(s.console_log_count)), sum(s.console_warn_count))), 100), 2) AS activity_score - FROM session_replay_events AS s - WHERE and(equals(s.team_id, 99999), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), globalIn(s.session_id, - (SELECT DISTINCT events.`$session_id` AS `$session_id` - FROM events - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 99999) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) - WHERE and(equals(events.team_id, 99999), ifNull(equals(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), '00000000-0000-0000-0000-000000000000'), 0), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), notEmpty(events.`$session_id`)))), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-29 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0)) - GROUP BY s.session_id - HAVING 1 - ORDER BY start_time DESC - LIMIT 51 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - allow_experimental_analyzer=0 - ''' -# --- diff --git a/ee/session_recordings/queries/test/test_session_recording_list_from_query.py b/ee/session_recordings/queries/test/test_session_recording_list_from_query.py deleted file mode 100644 index 3893af827e..0000000000 --- a/ee/session_recordings/queries/test/test_session_recording_list_from_query.py +++ /dev/null @@ -1,347 +0,0 @@ -import re -from itertools import product -from uuid import uuid4 - -from dateutil.relativedelta import relativedelta -from django.utils.timezone import now -from freezegun import freeze_time -from parameterized import parameterized - -from ee.clickhouse.materialized_columns.columns import materialize -from posthog.clickhouse.client import sync_execute -from posthog.hogql.ast import CompareOperation, And, SelectQuery -from posthog.hogql.base import Expr -from posthog.hogql.context import HogQLContext -from posthog.hogql.printer import print_ast -from posthog.models import Person -from posthog.schema import PersonsOnEventsMode, RecordingsQuery -from posthog.session_recordings.queries.session_recording_list_from_query import SessionRecordingListFromQuery -from posthog.session_recordings.queries.test.session_replay_sql import produce_replay_summary -from posthog.session_recordings.sql.session_replay_event_sql import TRUNCATE_SESSION_REPLAY_EVENTS_TABLE_SQL -from posthog.test.base import ( - APIBaseTest, - ClickhouseTestMixin, - QueryMatchingTest, - snapshot_clickhouse_queries, - _create_event, -) - - -# The HogQL pair of TestClickhouseSessionRecordingsListFromSessionReplay can be renamed when delete the old one -@freeze_time("2021-01-01T13:46:23") -class TestClickhouseSessionRecordingsListFromQuery(ClickhouseTestMixin, APIBaseTest, QueryMatchingTest): - def _print_query(self, query: SelectQuery) -> str: - return print_ast( - query, - HogQLContext(team_id=self.team.pk, enable_select_queries=True), - "clickhouse", - pretty=True, - ) - - def tearDown(self) -> None: - sync_execute(TRUNCATE_SESSION_REPLAY_EVENTS_TABLE_SQL()) - - @property - def base_time(self): - return (now() - relativedelta(hours=1)).replace(microsecond=0, second=0) - - def create_event( - self, - distinct_id, - timestamp, - team=None, - event_name="$pageview", - properties=None, - ): - if team is None: - team = self.team - if properties is None: - properties = {"$os": "Windows 95", "$current_url": "aloha.com/2"} - return _create_event( - team=team, - event=event_name, - timestamp=timestamp, - distinct_id=distinct_id, - properties=properties, - ) - - @parameterized.expand( - [ - [ - "test_poe_v1_still_falls_back_to_person_subquery", - True, - False, - False, - PersonsOnEventsMode.PERSON_ID_NO_OVERRIDE_PROPERTIES_ON_EVENTS, - ], - [ - "test_poe_being_unavailable_we_fall_back_to_person_id_overrides", - False, - False, - False, - PersonsOnEventsMode.PERSON_ID_OVERRIDE_PROPERTIES_JOINED, - ], - [ - "test_poe_being_unavailable_we_fall_back_to_person_subquery_but_still_use_mat_props", - False, - False, - False, - PersonsOnEventsMode.PERSON_ID_OVERRIDE_PROPERTIES_JOINED, - ], - [ - "test_allow_denormalised_props_fix_does_not_stop_all_poe_processing", - False, - True, - False, - PersonsOnEventsMode.PERSON_ID_OVERRIDE_PROPERTIES_ON_EVENTS, - ], - [ - "test_poe_v2_available_person_properties_are_used_in_replay_listing", - False, - True, - True, - PersonsOnEventsMode.PERSON_ID_OVERRIDE_PROPERTIES_ON_EVENTS, - ], - ] - ) - def test_effect_of_poe_settings_on_query_generated( - self, - _name: str, - poe_v1: bool, - poe_v2: bool, - allow_denormalized_props: bool, - expected_poe_mode: PersonsOnEventsMode, - ) -> None: - with self.settings( - PERSON_ON_EVENTS_OVERRIDE=poe_v1, - PERSON_ON_EVENTS_V2_OVERRIDE=poe_v2, - ALLOW_DENORMALIZED_PROPS_IN_LISTING=allow_denormalized_props, - ): - assert self.team.person_on_events_mode == expected_poe_mode - materialize("events", "rgInternal", table_column="person_properties") - - query = RecordingsQuery.model_validate( - { - "properties": [ - { - "key": "rgInternal", - "value": ["false"], - "operator": "exact", - "type": "person", - } - ] - }, - ) - session_recording_list_instance = SessionRecordingListFromQuery( - query=query, team=self.team, hogql_query_modifiers=None - ) - - hogql_parsed_select = session_recording_list_instance.get_query() - printed_query = self._print_query(hogql_parsed_select) - - person_filtering_expr = self._matching_person_filter_expr_from(hogql_parsed_select) - - self._assert_is_events_person_filter(person_filtering_expr) - - if poe_v1 or poe_v2: - # Property used directly from event (from materialized column) - assert "ifNull(equals(nullIf(nullIf(events.mat_pp_rgInternal, ''), 'null')" in printed_query - else: - # We get the person property value from the persons JOIN - assert re.search( - r"argMax\(replaceRegexpAll\(nullIf\(nullIf\(JSONExtractRaw\(person\.properties, %\(hogql_val_\d+\)s\), ''\), 'null'\), '^\"|\"\$', ''\), person\.version\) AS properties___rgInternal", - printed_query, - ) - # Then we actually filter on that property value - assert re.search( - r"ifNull\(equals\(events__person\.properties___rgInternal, %\(hogql_val_\d+\)s\), 0\)", - printed_query, - ) - self.assertQueryMatchesSnapshot(printed_query) - - def _assert_is_pdi_filter(self, person_filtering_expr: list[Expr]) -> None: - assert person_filtering_expr[0].right.select_from.table.chain == ["person_distinct_ids"] - assert person_filtering_expr[0].right.where.left.chain == ["person", "properties", "rgInternal"] - - def _assert_is_events_person_filter(self, person_filtering_expr: list[Expr]) -> None: - assert person_filtering_expr[0].right.select_from.table.chain == ["events"] - event_person_condition = [ - x - for x in person_filtering_expr[0].right.where.exprs - if isinstance(x, CompareOperation) and x.left.chain == ["person", "properties", "rgInternal"] - ] - assert len(event_person_condition) == 1 - - def _matching_person_filter_expr_from(self, hogql_parsed_select: SelectQuery) -> list[Expr]: - where_conditions: list[Expr] = hogql_parsed_select.where.exprs - ands = [x for x in where_conditions if isinstance(x, And)] - assert len(ands) == 1 - and_comparisons = [x for x in ands[0].exprs if isinstance(x, CompareOperation)] - assert len(and_comparisons) == 1 - assert isinstance(and_comparisons[0].right, SelectQuery) - return and_comparisons - - settings_combinations = [ - ["poe v2 and materialized columns allowed", False, True, True], - ["poe v2 and materialized columns off", False, True, False], - ["poe off and materialized columns allowed", False, False, True], - ["poe off and materialized columns not allowed", False, False, False], - ["poe v1 and materialized columns allowed", True, False, True], - ["poe v1 and not materialized columns not allowed", True, False, False], - ] - - # Options for "materialize person columns" - materialization_options = [ - [" with materialization", True], - [" without materialization", False], - ] - - # Expand the parameter list to the product of all combinations with "materialize person columns" - # e.g. [a, b] x [c, d] = [a, c], [a, d], [b, c], [b, d] - test_case_combinations = [ - [f"{name}{mat_option}", poe_v1, poe, mat_columns, mat_person] - for (name, poe_v1, poe, mat_columns), (mat_option, mat_person) in product( - settings_combinations, materialization_options - ) - ] - - @parameterized.expand(test_case_combinations) - @snapshot_clickhouse_queries - def test_event_filter_with_person_properties_materialized( - self, - _name: str, - poe1_enabled: bool, - poe2_enabled: bool, - allow_denormalised_props: bool, - materialize_person_props: bool, - ) -> None: - # KLUDGE: I couldn't figure out how to use @also_test_with_materialized_columns(person_properties=["email"]) - # KLUDGE: and the parameterized.expand decorator at the same time, so we generate test case combos - # KLUDGE: for materialization on and off to test both sides the way the decorator would have - if materialize_person_props: - materialize("events", "email", table_column="person_properties") - materialize("person", "email") - - with self.settings( - PERSON_ON_EVENTS_OVERRIDE=poe1_enabled, - PERSON_ON_EVENTS_V2_OVERRIDE=poe2_enabled, - ALLOW_DENORMALIZED_PROPS_IN_LISTING=allow_denormalised_props, - ): - user_one = "test_event_filter_with_person_properties-user" - user_two = "test_event_filter_with_person_properties-user2" - session_id_one = f"test_event_filter_with_person_properties-1-{str(uuid4())}" - session_id_two = f"test_event_filter_with_person_properties-2-{str(uuid4())}" - - Person.objects.create(team=self.team, distinct_ids=[user_one], properties={"email": "bla"}) - Person.objects.create(team=self.team, distinct_ids=[user_two], properties={"email": "bla2"}) - - self._add_replay_with_pageview(session_id_one, user_one) - produce_replay_summary( - distinct_id=user_one, - session_id=session_id_one, - first_timestamp=(self.base_time + relativedelta(seconds=30)), - team_id=self.team.id, - ) - self._add_replay_with_pageview(session_id_two, user_two) - produce_replay_summary( - distinct_id=user_two, - session_id=session_id_two, - first_timestamp=(self.base_time + relativedelta(seconds=30)), - team_id=self.team.id, - ) - - match_everyone_filter = RecordingsQuery.model_validate( - {"properties": []}, - ) - - session_recording_list_instance = SessionRecordingListFromQuery( - query=match_everyone_filter, team=self.team, hogql_query_modifiers=None - ) - (session_recordings, _, _) = session_recording_list_instance.run() - - assert sorted([x["session_id"] for x in session_recordings]) == sorted([session_id_one, session_id_two]) - - match_bla_filter = RecordingsQuery.model_validate( - { - "properties": [ - { - "key": "email", - "value": ["bla"], - "operator": "exact", - "type": "person", - } - ] - }, - ) - - session_recording_list_instance = SessionRecordingListFromQuery( - query=match_bla_filter, team=self.team, hogql_query_modifiers=None - ) - (session_recordings, _, _) = session_recording_list_instance.run() - - assert len(session_recordings) == 1 - assert session_recordings[0]["session_id"] == session_id_one - - def _add_replay_with_pageview(self, session_id: str, user: str) -> None: - self.create_event( - user, - self.base_time, - properties={"$session_id": session_id, "$window_id": str(uuid4())}, - ) - produce_replay_summary( - distinct_id=user, - session_id=session_id, - first_timestamp=self.base_time, - team_id=self.team.id, - ) - - @parameterized.expand(test_case_combinations) - @snapshot_clickhouse_queries - def test_person_id_filter( - self, - _name: str, - poe2_enabled: bool, - poe1_enabled: bool, - allow_denormalised_props: bool, - materialize_person_props: bool, - ) -> None: - # KLUDGE: I couldn't figure out how to use @also_test_with_materialized_columns(person_properties=["email"]) - # KLUDGE: and the parameterized.expand decorator at the same time, so we generate test case combos - # KLUDGE: for materialization on and off to test both sides the way the decorator would have - if materialize_person_props: - # it shouldn't matter to this test whether any column is materialized - # but let's keep the tests in this file similar so we flush out any unexpected interactions - materialize("events", "email", table_column="person_properties") - materialize("person", "email") - - with self.settings( - PERSON_ON_EVENTS_OVERRIDE=poe1_enabled, - PERSON_ON_EVENTS_V2_OVERRIDE=poe2_enabled, - ALLOW_DENORMALIZED_PROPS_IN_LISTING=allow_denormalised_props, - ): - three_user_ids = ["person-1-distinct-1", "person-1-distinct-2", "person-2"] - session_id_one = f"test_person_id_filter-session-one" - session_id_two = f"test_person_id_filter-session-two" - session_id_three = f"test_person_id_filter-session-three" - - p = Person.objects.create( - team=self.team, - distinct_ids=[three_user_ids[0], three_user_ids[1]], - properties={"email": "bla"}, - ) - Person.objects.create( - team=self.team, - distinct_ids=[three_user_ids[2]], - properties={"email": "bla2"}, - ) - - self._add_replay_with_pageview(session_id_one, three_user_ids[0]) - self._add_replay_with_pageview(session_id_two, three_user_ids[1]) - self._add_replay_with_pageview(session_id_three, three_user_ids[2]) - - query = RecordingsQuery.model_validate({"person_uuid": str(p.uuid)}) - session_recording_list_instance = SessionRecordingListFromQuery( - query=query, team=self.team, hogql_query_modifiers=None - ) - (session_recordings, _, _) = session_recording_list_instance.run() - assert sorted([r["session_id"] for r in session_recordings]) == sorted([session_id_two, session_id_one]) diff --git a/ee/session_recordings/session_recording_extensions.py b/ee/session_recordings/session_recording_extensions.py deleted file mode 100644 index b14397717f..0000000000 --- a/ee/session_recordings/session_recording_extensions.py +++ /dev/null @@ -1,97 +0,0 @@ -# EE extended functions for SessionRecording model -from datetime import timedelta - -import structlog -from django.utils import timezone -from prometheus_client import Histogram, Counter - -from posthog import settings -from posthog.session_recordings.models.session_recording import SessionRecording -from posthog.storage import object_storage - -logger = structlog.get_logger(__name__) - -SNAPSHOT_PERSIST_TIME_HISTOGRAM = Histogram( - "snapshot_persist_time_seconds", - "We persist recording snapshots from S3, how long does that take?", -) - -SNAPSHOT_PERSIST_SUCCESS_COUNTER = Counter( - "snapshot_persist_success", - "Count of session recordings that were successfully persisted", -) - -SNAPSHOT_PERSIST_FAILURE_COUNTER = Counter( - "snapshot_persist_failure", - "Count of session recordings that failed to be persisted", -) - -SNAPSHOT_PERSIST_TOO_YOUNG_COUNTER = Counter( - "snapshot_persist_too_young", - "Count of session recordings that were too young to be persisted", -) - -RECORDING_PERSIST_START_COUNTER = Counter( - "recording_persist_started", - "Count of session recordings that were persisted", -) - -MINIMUM_AGE_FOR_RECORDING = timedelta(hours=24) - - -class InvalidRecordingForPersisting(Exception): - pass - - -def persist_recording(recording_id: str, team_id: int) -> None: - """Persist a recording to the S3""" - - if not settings.OBJECT_STORAGE_ENABLED: - return - - recording = SessionRecording.objects.select_related("team").get(session_id=recording_id, team_id=team_id) - - if not recording: - raise Exception(f"Recording {recording_id} not found") - - if recording.deleted: - logger.info( - "Persisting recording: skipping as recording is deleted", - recording_id=recording_id, - team_id=team_id, - ) - return - - RECORDING_PERSIST_START_COUNTER.inc() - - recording.load_metadata() - - if not recording.start_time or timezone.now() < recording.start_time + MINIMUM_AGE_FOR_RECORDING: - # Recording is too recent to be persisted. - # We can save the metadata as it is still useful for querying, but we can't move to S3 yet. - SNAPSHOT_PERSIST_TOO_YOUNG_COUNTER.inc() - recording.save() - return - - target_prefix = recording.build_blob_lts_storage_path("2023-08-01") - source_prefix = recording.build_blob_ingestion_storage_path() - # if snapshots are already in blob storage, then we can just copy the files between buckets - with SNAPSHOT_PERSIST_TIME_HISTOGRAM.time(): - copied_count = object_storage.copy_objects(source_prefix, target_prefix) - - if copied_count > 0: - recording.storage_version = "2023-08-01" - recording.object_storage_path = target_prefix - recording.save() - SNAPSHOT_PERSIST_SUCCESS_COUNTER.inc() - return - else: - SNAPSHOT_PERSIST_FAILURE_COUNTER.inc() - logger.error( - "No snapshots found to copy in S3 when persisting a recording", - recording_id=recording_id, - team_id=team_id, - target_prefix=target_prefix, - source_prefix=source_prefix, - ) - raise InvalidRecordingForPersisting("Could not persist recording: " + recording_id) diff --git a/ee/session_recordings/session_recording_playlist.py b/ee/session_recordings/session_recording_playlist.py deleted file mode 100644 index c95c274e89..0000000000 --- a/ee/session_recordings/session_recording_playlist.py +++ /dev/null @@ -1,261 +0,0 @@ -from typing import Any, Optional - -import structlog -from django.db.models import Q, QuerySet -from django.utils.timezone import now -from django_filters.rest_framework import DjangoFilterBackend -from loginas.utils import is_impersonated_session -from rest_framework import request, response, serializers, viewsets -from posthog.api.utils import action - -from posthog.api.forbid_destroy_model import ForbidDestroyModel -from posthog.api.routing import TeamAndOrgViewSetMixin -from posthog.api.shared import UserBasicSerializer -from posthog.models import ( - SessionRecording, - SessionRecordingPlaylist, - SessionRecordingPlaylistItem, - User, -) -from posthog.models.activity_logging.activity_log import ( - Change, - Detail, - changes_between, - log_activity, -) -from posthog.models.utils import UUIDT -from posthog.rate_limit import ( - ClickHouseBurstRateThrottle, - ClickHouseSustainedRateThrottle, -) -from posthog.schema import RecordingsQuery -from posthog.session_recordings.session_recording_api import ( - list_recordings_response, - query_as_params_to_dict, - list_recordings_from_query, -) -from posthog.utils import relative_date_parse - -logger = structlog.get_logger(__name__) - - -def log_playlist_activity( - activity: str, - playlist: SessionRecordingPlaylist, - playlist_id: int, - playlist_short_id: str, - organization_id: UUIDT, - team_id: int, - user: User, - was_impersonated: bool, - changes: Optional[list[Change]] = None, -) -> None: - """ - Insight id and short_id are passed separately as some activities (like delete) alter the Insight instance - - The experiments feature creates insights without a name, this does not log those - """ - - playlist_name: Optional[str] = playlist.name if playlist.name else playlist.derived_name - if playlist_name: - log_activity( - organization_id=organization_id, - team_id=team_id, - user=user, - was_impersonated=was_impersonated, - item_id=playlist_id, - scope="SessionRecordingPlaylist", - activity=activity, - detail=Detail(name=playlist_name, changes=changes, short_id=playlist_short_id), - ) - - -class SessionRecordingPlaylistSerializer(serializers.ModelSerializer): - class Meta: - model = SessionRecordingPlaylist - fields = [ - "id", - "short_id", - "name", - "derived_name", - "description", - "pinned", - "created_at", - "created_by", - "deleted", - "filters", - "last_modified_at", - "last_modified_by", - ] - read_only_fields = [ - "id", - "short_id", - "team", - "created_at", - "created_by", - "last_modified_at", - "last_modified_by", - ] - - created_by = UserBasicSerializer(read_only=True) - last_modified_by = UserBasicSerializer(read_only=True) - - def create(self, validated_data: dict, *args, **kwargs) -> SessionRecordingPlaylist: - request = self.context["request"] - team = self.context["get_team"]() - - created_by = validated_data.pop("created_by", request.user) - playlist = SessionRecordingPlaylist.objects.create( - team=team, - created_by=created_by, - last_modified_by=request.user, - **validated_data, - ) - - log_playlist_activity( - activity="created", - playlist=playlist, - playlist_id=playlist.id, - playlist_short_id=playlist.short_id, - organization_id=self.context["request"].user.current_organization_id, - team_id=team.id, - user=self.context["request"].user, - was_impersonated=is_impersonated_session(self.context["request"]), - ) - - return playlist - - def update(self, instance: SessionRecordingPlaylist, validated_data: dict, **kwargs) -> SessionRecordingPlaylist: - try: - before_update = SessionRecordingPlaylist.objects.get(pk=instance.id) - except SessionRecordingPlaylist.DoesNotExist: - before_update = None - - if validated_data.keys() & SessionRecordingPlaylist.MATERIAL_PLAYLIST_FIELDS: - instance.last_modified_at = now() - instance.last_modified_by = self.context["request"].user - - updated_playlist = super().update(instance, validated_data) - changes = changes_between("SessionRecordingPlaylist", previous=before_update, current=updated_playlist) - - log_playlist_activity( - activity="updated", - playlist=updated_playlist, - playlist_id=updated_playlist.id, - playlist_short_id=updated_playlist.short_id, - organization_id=self.context["request"].user.current_organization_id, - team_id=self.context["team_id"], - user=self.context["request"].user, - was_impersonated=is_impersonated_session(self.context["request"]), - changes=changes, - ) - - return updated_playlist - - -class SessionRecordingPlaylistViewSet(TeamAndOrgViewSetMixin, ForbidDestroyModel, viewsets.ModelViewSet): - scope_object = "session_recording_playlist" - queryset = SessionRecordingPlaylist.objects.all() - serializer_class = SessionRecordingPlaylistSerializer - throttle_classes = [ClickHouseBurstRateThrottle, ClickHouseSustainedRateThrottle] - filter_backends = [DjangoFilterBackend] - filterset_fields = ["short_id", "created_by"] - lookup_field = "short_id" - - def safely_get_queryset(self, queryset) -> QuerySet: - if not self.action.endswith("update"): - # Soft-deleted insights can be brought back with a PATCH request - queryset = queryset.filter(deleted=False) - - queryset = queryset.select_related("created_by", "last_modified_by", "team") - if self.action == "list": - queryset = queryset.filter(deleted=False) - queryset = self._filter_request(self.request, queryset) - - order = self.request.GET.get("order", None) - if order: - queryset = queryset.order_by(order) - else: - queryset = queryset.order_by("-last_modified_at") - - return queryset - - def _filter_request(self, request: request.Request, queryset: QuerySet) -> QuerySet: - filters = request.GET.dict() - - for key in filters: - if key == "user": - queryset = queryset.filter(created_by=request.user) - elif key == "pinned": - queryset = queryset.filter(pinned=True) - elif key == "date_from": - queryset = queryset.filter( - last_modified_at__gt=relative_date_parse(request.GET["date_from"], self.team.timezone_info) - ) - elif key == "date_to": - queryset = queryset.filter( - last_modified_at__lt=relative_date_parse(request.GET["date_to"], self.team.timezone_info) - ) - elif key == "search": - queryset = queryset.filter( - Q(name__icontains=request.GET["search"]) | Q(derived_name__icontains=request.GET["search"]) - ) - elif key == "session_recording_id": - queryset = queryset.filter(playlist_items__recording_id=request.GET["session_recording_id"]) - return queryset - - # As of now, you can only "update" a session recording by adding or removing a recording from a static playlist - @action(methods=["GET"], detail=True, url_path="recordings") - def recordings(self, request: request.Request, *args: Any, **kwargs: Any) -> response.Response: - playlist = self.get_object() - playlist_items = list( - SessionRecordingPlaylistItem.objects.filter(playlist=playlist) - .exclude(deleted=True) - .order_by("-created_at") - .values_list("recording_id", flat=True) - ) - - data_dict = query_as_params_to_dict(request.GET.dict()) - query = RecordingsQuery.model_validate(data_dict) - query.session_ids = playlist_items - return list_recordings_response( - list_recordings_from_query(query, request, context=self.get_serializer_context()) - ) - - # As of now, you can only "update" a session recording by adding or removing a recording from a static playlist - @action( - methods=["POST", "DELETE"], - detail=True, - url_path="recordings/(?P<session_recording_id>[^/.]+)", - ) - def modify_recordings( - self, - request: request.Request, - session_recording_id: str, - *args: Any, - **kwargs: Any, - ) -> response.Response: - playlist = self.get_object() - - # TODO: Maybe we need to save the created_at date here properly to help with filtering - if request.method == "POST": - recording, _ = SessionRecording.objects.get_or_create( - session_id=session_recording_id, - team=self.team, - defaults={"deleted": False}, - ) - playlist_item, created = SessionRecordingPlaylistItem.objects.get_or_create( - playlist=playlist, recording=recording - ) - - return response.Response({"success": True}) - - if request.method == "DELETE": - playlist_item = SessionRecordingPlaylistItem.objects.get(playlist=playlist, recording=session_recording_id) - - if playlist_item: - playlist_item.delete() - - return response.Response({"success": True}) - - raise NotImplementedError() diff --git a/ee/session_recordings/session_summary/summarize_session.py b/ee/session_recordings/session_summary/summarize_session.py deleted file mode 100644 index 536eb03477..0000000000 --- a/ee/session_recordings/session_summary/summarize_session.py +++ /dev/null @@ -1,144 +0,0 @@ -import openai - -from prometheus_client import Histogram - -from posthog.api.activity_log import ServerTimingsGathered -from posthog.models import User, Team -from posthog.session_recordings.models.session_recording import SessionRecording - -from posthog.session_recordings.queries.session_replay_events import SessionReplayEvents - -from posthog.utils import get_instance_region - -from ee.session_recordings.ai.utils import ( - SessionSummaryPromptData, - simplify_window_id, - deduplicate_urls, - format_dates, - collapse_sequence_of_events, -) - -TOKENS_IN_PROMPT_HISTOGRAM = Histogram( - "posthog_session_summary_tokens_in_prompt_histogram", - "histogram of the number of tokens in the prompt used to generate a session summary", - buckets=[ - 0, - 10, - 50, - 100, - 500, - 1000, - 2000, - 3000, - 4000, - 5000, - 6000, - 7000, - 8000, - 10000, - 20000, - 30000, - 40000, - 50000, - 100000, - 128000, - float("inf"), - ], -) - - -def summarize_recording(recording: SessionRecording, user: User, team: Team): - timer = ServerTimingsGathered() - - with timer("get_metadata"): - session_metadata = SessionReplayEvents().get_metadata(session_id=str(recording.session_id), team=team) - if not session_metadata: - raise ValueError(f"no session metadata found for session_id {recording.session_id}") - - with timer("get_events"): - session_events = SessionReplayEvents().get_events( - session_id=str(recording.session_id), - team=team, - metadata=session_metadata, - events_to_ignore=[ - "$feature_flag_called", - ], - ) - if not session_events or not session_events[0] or not session_events[1]: - raise ValueError(f"no events found for session_id {recording.session_id}") - - # convert session_metadata to a Dict from a TypedDict - # so that we can amend its values freely - session_metadata_dict = dict(session_metadata) - - del session_metadata_dict["distinct_id"] - start_time = session_metadata["start_time"] - session_metadata_dict["start_time"] = start_time.isoformat() - session_metadata_dict["end_time"] = session_metadata["end_time"].isoformat() - - with timer("generate_prompt"): - prompt_data = deduplicate_urls( - collapse_sequence_of_events( - format_dates( - simplify_window_id(SessionSummaryPromptData(columns=session_events[0], results=session_events[1])), - start=start_time, - ) - ) - ) - - instance_region = get_instance_region() or "HOBBY" - - with timer("openai_completion"): - result = openai.chat.completions.create( - model="gpt-4o-mini", # allows 128k tokens - temperature=0.7, - messages=[ - { - "role": "system", - "content": """ - Session Replay is PostHog's tool to record visits to web sites and apps. - We also gather events that occur like mouse clicks and key presses. - You write two or three sentence concise and simple summaries of those sessions based on a prompt. - You are more likely to mention errors or things that look like business success such as checkout events. - You always try to make the summary actionable. E.g. mentioning what someone clicked on, or summarizing errors they experienced. - You don't help with other knowledge.""", - }, - { - "role": "user", - "content": f"""the session metadata I have is {session_metadata_dict}. - it gives an overview of activity and duration""", - }, - { - "role": "user", - "content": f""" - URLs associated with the events can be found in this mapping {prompt_data.url_mapping}. You never refer to URLs by their placeholder. Always refer to the URL with the simplest version e.g. posthog.com or posthog.com/replay - """, - }, - { - "role": "user", - "content": f"""the session events I have are {prompt_data.results}. - with columns {prompt_data.columns}. - they give an idea of what happened and when, - if present the elements_chain_texts, elements_chain_elements, and elements_chain_href extracted from the html can aid in understanding what a user interacted with - but should not be directly used in your response""", - }, - { - "role": "user", - "content": """ - generate a two or three sentence summary of the session. - only summarize, don't offer advice. - use as concise and simple language as is possible. - Dont' refer to the session length unless it is notable for some reason. - assume a reading age of around 12 years old. - generate no text other than the summary.""", - }, - ], - user=f"{instance_region}/{user.pk}", # allows 8k tokens - ) - - usage = result.usage.prompt_tokens if result.usage else None - if usage: - TOKENS_IN_PROMPT_HISTOGRAM.observe(usage) - - content: str = result.choices[0].message.content or "" - return {"content": content, "timings": timer.get_all_timings()} diff --git a/ee/session_recordings/session_summary/test/test_summarize_session.py b/ee/session_recordings/session_summary/test/test_summarize_session.py deleted file mode 100644 index 3cc69df02b..0000000000 --- a/ee/session_recordings/session_summary/test/test_summarize_session.py +++ /dev/null @@ -1,116 +0,0 @@ -from datetime import datetime, UTC - -from dateutil.parser import isoparse - -from ee.session_recordings.session_summary.summarize_session import ( - format_dates, - simplify_window_id, - deduplicate_urls, - collapse_sequence_of_events, - SessionSummaryPromptData, -) -from posthog.test.base import BaseTest - - -class TestSummarizeSessions(BaseTest): - def test_format_dates_as_millis_since_start(self) -> None: - processed = format_dates( - SessionSummaryPromptData( - columns=["event", "timestamp"], - results=[ - ["$pageview", isoparse("2021-01-01T00:00:00Z")], - ["$pageview", isoparse("2021-01-01T00:00:01Z")], - ["$pageview", isoparse("2021-01-01T00:00:02Z")], - ], - ), - datetime(2021, 1, 1, 0, 0, 0, tzinfo=UTC), - ) - assert processed.columns == ["event", "milliseconds_since_start"] - assert processed.results == [["$pageview", 0], ["$pageview", 1000], ["$pageview", 2000]] - - def test_simplify_window_id(self) -> None: - processed = simplify_window_id( - SessionSummaryPromptData( - columns=["event", "timestamp", "$window_id"], - results=[ - ["$pageview-1-1", isoparse("2021-01-01T00:00:00Z"), "window-the-first"], - ["$pageview-1-2", isoparse("2021-01-01T00:00:01Z"), "window-the-first"], - ["$pageview-2-1", isoparse("2021-01-01T00:00:02Z"), "window-the-second"], - ["$pageview-4-1", isoparse("2021-01-01T00:00:02Z"), "window-the-fourth"], - ["$pageview-3-1", isoparse("2021-01-01T00:00:02Z"), "window-the-third"], - ["$pageview-1-3", isoparse("2021-01-01T00:00:02Z"), "window-the-first"], - ], - ) - ) - - assert processed.columns == ["event", "timestamp", "$window_id"] - assert processed.results == [ - ["$pageview-1-1", isoparse("2021-01-01T00:00:00Z"), 1], - ["$pageview-1-2", isoparse("2021-01-01T00:00:01Z"), 1], - ["$pageview-2-1", isoparse("2021-01-01T00:00:02Z"), 2], - # window the fourth has index 3... - # in reality these are mapping from UUIDs - # and this apparent switch of number wouldn't stand out - ["$pageview-4-1", isoparse("2021-01-01T00:00:02Z"), 3], - ["$pageview-3-1", isoparse("2021-01-01T00:00:02Z"), 4], - ["$pageview-1-3", isoparse("2021-01-01T00:00:02Z"), 1], - ] - - def test_collapse_sequence_of_events(self) -> None: - processed = collapse_sequence_of_events( - SessionSummaryPromptData( - columns=["event", "timestamp", "$window_id"], - results=[ - # these collapse because they're a sequence - ["$pageview", isoparse("2021-01-01T00:00:00Z"), 1], - ["$pageview", isoparse("2021-01-01T01:00:00Z"), 1], - ["$pageview", isoparse("2021-01-01T02:00:00Z"), 1], - ["$pageview", isoparse("2021-01-01T03:00:00Z"), 1], - # these don't collapse because they're different windows - ["$autocapture", isoparse("2021-01-01T00:00:00Z"), 1], - ["$autocapture", isoparse("2021-01-01T01:00:00Z"), 2], - # these don't collapse because they're not a sequence - ["$a", isoparse("2021-01-01T01:00:00Z"), 2], - ["$b", isoparse("2021-01-01T01:00:00Z"), 2], - ["$c", isoparse("2021-01-01T01:00:00Z"), 2], - ], - ) - ) - assert processed.columns == ["event", "timestamp", "$window_id", "event_repetition_count"] - assert processed.results == [ - ["$pageview", isoparse("2021-01-01T00:00:00Z"), 1, 4], - ["$autocapture", isoparse("2021-01-01T00:00:00Z"), 1, None], - ["$autocapture", isoparse("2021-01-01T01:00:00Z"), 2, None], - ["$a", isoparse("2021-01-01T01:00:00Z"), 2, None], - ["$b", isoparse("2021-01-01T01:00:00Z"), 2, None], - ["$c", isoparse("2021-01-01T01:00:00Z"), 2, None], - ] - - def test_deduplicate_ids(self) -> None: - processed = deduplicate_urls( - SessionSummaryPromptData( - columns=["event", "$current_url"], - results=[ - ["$pageview-one", "https://example.com/one"], - ["$pageview-two", "https://example.com/two"], - ["$pageview-one", "https://example.com/one"], - ["$pageview-one", "https://example.com/one"], - ["$pageview-two", "https://example.com/two"], - ["$pageview-three", "https://example.com/three"], - ], - ) - ) - assert processed.columns == ["event", "$current_url"] - assert processed.results == [ - ["$pageview-one", "url_1"], - ["$pageview-two", "url_2"], - ["$pageview-one", "url_1"], - ["$pageview-one", "url_1"], - ["$pageview-two", "url_2"], - ["$pageview-three", "url_3"], - ] - assert processed.url_mapping == { - "https://example.com/one": "url_1", - "https://example.com/two": "url_2", - "https://example.com/three": "url_3", - } diff --git a/ee/session_recordings/test/__init__.py b/ee/session_recordings/test/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/session_recordings/test/test_session_recording_extensions.py b/ee/session_recordings/test/test_session_recording_extensions.py deleted file mode 100644 index 04dccb2faa..0000000000 --- a/ee/session_recordings/test/test_session_recording_extensions.py +++ /dev/null @@ -1,134 +0,0 @@ -from datetime import timedelta, datetime, UTC -from secrets import token_urlsafe -from uuid import uuid4 - -from boto3 import resource -from botocore.config import Config -from freezegun import freeze_time - -from ee.session_recordings.session_recording_extensions import ( - persist_recording, -) -from posthog.session_recordings.models.session_recording import SessionRecording -from posthog.session_recordings.queries.test.session_replay_sql import ( - produce_replay_summary, -) -from posthog.settings import ( - OBJECT_STORAGE_ENDPOINT, - OBJECT_STORAGE_ACCESS_KEY_ID, - OBJECT_STORAGE_SECRET_ACCESS_KEY, - OBJECT_STORAGE_BUCKET, -) -from posthog.storage.object_storage import write, list_objects, object_storage_client -from posthog.test.base import APIBaseTest, ClickhouseTestMixin - -long_url = f"https://app.posthog.com/my-url?token={token_urlsafe(600)}" - - -TEST_BUCKET = "test_storage_bucket-TestSessionRecordingExtensions" - - -class TestSessionRecordingExtensions(ClickhouseTestMixin, APIBaseTest): - def teardown_method(self, method) -> None: - s3 = resource( - "s3", - endpoint_url=OBJECT_STORAGE_ENDPOINT, - aws_access_key_id=OBJECT_STORAGE_ACCESS_KEY_ID, - aws_secret_access_key=OBJECT_STORAGE_SECRET_ACCESS_KEY, - config=Config(signature_version="s3v4"), - region_name="us-east-1", - ) - bucket = s3.Bucket(OBJECT_STORAGE_BUCKET) - bucket.objects.filter(Prefix=TEST_BUCKET).delete() - - def test_does_not_persist_too_recent_recording(self): - recording = SessionRecording.objects.create( - team=self.team, - session_id=f"test_does_not_persist_too_recent_recording-s1-{uuid4()}", - ) - - produce_replay_summary( - team_id=self.team.pk, - session_id=recording.session_id, - distinct_id="distinct_id_1", - first_timestamp=recording.created_at, - last_timestamp=recording.created_at, - ) - persist_recording(recording.session_id, recording.team_id) - recording.refresh_from_db() - - assert not recording.object_storage_path - - def test_can_build_object_storage_paths(self) -> None: - produce_replay_summary( - session_id="test_can_build_different_object_storage_paths-s1", - team_id=self.team.pk, - ) - - recording: SessionRecording = SessionRecording.objects.create( - team=self.team, - session_id="test_can_build_different_object_storage_paths-s1", - ) - - assert ( - recording.build_blob_lts_storage_path("2023-08-01") - == f"session_recordings_lts/team_id/{self.team.pk}/session_id/test_can_build_different_object_storage_paths-s1/data" - ) - - def test_persists_recording_from_blob_ingested_storage(self): - with self.settings(OBJECT_STORAGE_SESSION_RECORDING_BLOB_INGESTION_FOLDER=TEST_BUCKET): - two_minutes_ago = (datetime.now() - timedelta(minutes=2)).replace(tzinfo=UTC) - - with freeze_time(two_minutes_ago): - session_id = f"test_persists_recording_from_blob_ingested_storage-s1-{uuid4()}" - - produce_replay_summary( - session_id=session_id, - team_id=self.team.pk, - first_timestamp=(two_minutes_ago - timedelta(hours=48)).isoformat(), - last_timestamp=(two_minutes_ago - timedelta(hours=46)).isoformat(), - distinct_id="distinct_id_1", - first_url="https://app.posthog.com/my-url", - ) - - # this recording already has several files stored from Mr. Blobby - # these need to be written before creating the recording object - blob_path = f"{TEST_BUCKET}/team_id/{self.team.pk}/session_id/{session_id}/data" - for file in ["a", "b", "c"]: - file_name = f"{blob_path}/{file}" - write(file_name, f"my content-{file}".encode()) - - assert object_storage_client().list_objects(OBJECT_STORAGE_BUCKET, blob_path) == [ - f"{blob_path}/a", - f"{blob_path}/b", - f"{blob_path}/c", - ] - - recording: SessionRecording = SessionRecording.objects.create(team=self.team, session_id=session_id) - - assert recording.created_at == two_minutes_ago - assert recording.storage_version is None - - persist_recording(recording.session_id, recording.team_id) - recording.refresh_from_db() - - assert ( - recording.object_storage_path - == f"session_recordings_lts/team_id/{self.team.pk}/session_id/{recording.session_id}/data" - ) - assert recording.start_time == recording.created_at - timedelta(hours=48) - assert recording.end_time == recording.created_at - timedelta(hours=46) - - assert recording.storage_version == "2023-08-01" - assert recording.distinct_id == "distinct_id_1" - assert recording.duration == 7200 - assert recording.click_count == 0 - assert recording.keypress_count == 0 - assert recording.start_url == "https://app.posthog.com/my-url" - - stored_objects = list_objects(recording.build_blob_lts_storage_path("2023-08-01")) - assert stored_objects == [ - f"{recording.build_blob_lts_storage_path('2023-08-01')}/a", - f"{recording.build_blob_lts_storage_path('2023-08-01')}/b", - f"{recording.build_blob_lts_storage_path('2023-08-01')}/c", - ] diff --git a/ee/session_recordings/test/test_session_recording_playlist.py b/ee/session_recordings/test/test_session_recording_playlist.py deleted file mode 100644 index 2d26d96aab..0000000000 --- a/ee/session_recordings/test/test_session_recording_playlist.py +++ /dev/null @@ -1,351 +0,0 @@ -from datetime import datetime, timedelta, UTC -from unittest import mock -from unittest.mock import MagicMock, patch -from uuid import uuid4 - -from boto3 import resource -from botocore.config import Config -from django.test import override_settings -from freezegun import freeze_time -from rest_framework import status - -from ee.api.test.base import APILicensedTest -from posthog.models import SessionRecording, SessionRecordingPlaylistItem -from posthog.models.user import User -from posthog.session_recordings.models.session_recording_playlist import ( - SessionRecordingPlaylist, -) -from posthog.session_recordings.queries.test.session_replay_sql import ( - produce_replay_summary, -) -from posthog.settings import ( - OBJECT_STORAGE_ACCESS_KEY_ID, - OBJECT_STORAGE_BUCKET, - OBJECT_STORAGE_ENDPOINT, - OBJECT_STORAGE_SECRET_ACCESS_KEY, -) - -TEST_BUCKET = "test_storage_bucket-ee.TestSessionRecordingPlaylist" - - -@override_settings( - OBJECT_STORAGE_SESSION_RECORDING_BLOB_INGESTION_FOLDER=TEST_BUCKET, - OBJECT_STORAGE_SESSION_RECORDING_LTS_FOLDER=f"{TEST_BUCKET}_lts", -) -class TestSessionRecordingPlaylist(APILicensedTest): - def teardown_method(self, method) -> None: - s3 = resource( - "s3", - endpoint_url=OBJECT_STORAGE_ENDPOINT, - aws_access_key_id=OBJECT_STORAGE_ACCESS_KEY_ID, - aws_secret_access_key=OBJECT_STORAGE_SECRET_ACCESS_KEY, - config=Config(signature_version="s3v4"), - region_name="us-east-1", - ) - bucket = s3.Bucket(OBJECT_STORAGE_BUCKET) - bucket.objects.filter(Prefix=TEST_BUCKET).delete() - - def test_list_playlists(self): - response = self.client.get(f"/api/projects/{self.team.id}/session_recording_playlists") - assert response.status_code == status.HTTP_200_OK - assert response.json() == { - "count": 0, - "next": None, - "previous": None, - "results": [], - } - - def test_creates_playlist(self): - response = self.client.post( - f"/api/projects/{self.team.id}/session_recording_playlists", - data={"name": "test"}, - ) - assert response.status_code == status.HTTP_201_CREATED - assert response.json() == { - "id": response.json()["id"], - "short_id": response.json()["short_id"], - "name": "test", - "derived_name": None, - "description": "", - "pinned": False, - "created_at": mock.ANY, - "created_by": response.json()["created_by"], - "deleted": False, - "filters": {}, - "last_modified_at": mock.ANY, - "last_modified_by": response.json()["last_modified_by"], - } - - def test_can_create_many_playlists(self): - for i in range(100): - response = self.client.post( - f"/api/projects/{self.team.id}/session_recording_playlists", - data={"name": f"test-{i}"}, - ) - assert response.status_code == status.HTTP_201_CREATED - - def test_gets_individual_playlist_by_shortid(self): - create_response = self.client.post(f"/api/projects/{self.team.id}/session_recording_playlists") - response = self.client.get( - f"/api/projects/{self.team.id}/session_recording_playlists/{create_response.json()['short_id']}" - ) - - assert response.json()["short_id"] == create_response.json()["short_id"] - - def test_updates_playlist(self): - short_id = self.client.post(f"/api/projects/{self.team.id}/session_recording_playlists/").json()["short_id"] - - with freeze_time("2022-01-02"): - response = self.client.patch( - f"/api/projects/{self.team.id}/session_recording_playlists/{short_id}", - { - "name": "changed name", - "description": "changed description", - "filters": {"events": [{"id": "test"}]}, - "pinned": True, - }, - ) - - assert response.json()["short_id"] == short_id - assert response.json()["name"] == "changed name" - assert response.json()["description"] == "changed description" - assert response.json()["filters"] == {"events": [{"id": "test"}]} - assert response.json()["created_at"] == mock.ANY - assert response.json()["last_modified_at"] == "2022-01-02T00:00:00Z" - - def test_rejects_updates_to_readonly_playlist_properties(self): - short_id = self.client.post(f"/api/projects/{self.team.id}/session_recording_playlists/").json()["short_id"] - - response = self.client.patch( - f"/api/projects/{self.team.id}/session_recording_playlists/{short_id}", - {"short_id": "something else", "pinned": True}, - ) - - assert response.json()["short_id"] == short_id - assert response.json()["pinned"] - - def test_filters_based_on_params(self): - other_user = User.objects.create_and_join(self.organization, "other@posthog.com", "password") - playlist1 = SessionRecordingPlaylist.objects.create(team=self.team, name="playlist", created_by=self.user) - playlist2 = SessionRecordingPlaylist.objects.create(team=self.team, pinned=True, created_by=self.user) - playlist3 = SessionRecordingPlaylist.objects.create(team=self.team, name="my playlist", created_by=other_user) - - results = self.client.get( - f"/api/projects/{self.team.id}/session_recording_playlists?search=my", - ).json()["results"] - - assert len(results) == 1 - assert results[0]["short_id"] == playlist3.short_id - - results = self.client.get( - f"/api/projects/{self.team.id}/session_recording_playlists?search=playlist", - ).json()["results"] - - assert len(results) == 2 - assert results[0]["short_id"] == playlist3.short_id - assert results[1]["short_id"] == playlist1.short_id - - results = self.client.get( - f"/api/projects/{self.team.id}/session_recording_playlists?user=true", - ).json()["results"] - - assert len(results) == 2 - assert results[0]["short_id"] == playlist2.short_id - assert results[1]["short_id"] == playlist1.short_id - - results = self.client.get( - f"/api/projects/{self.team.id}/session_recording_playlists?pinned=true", - ).json()["results"] - - assert len(results) == 1 - assert results[0]["short_id"] == playlist2.short_id - - results = self.client.get( - f"/api/projects/{self.team.id}/session_recording_playlists?created_by={other_user.id}", - ).json()["results"] - - assert len(results) == 1 - assert results[0]["short_id"] == playlist3.short_id - - @patch("ee.session_recordings.session_recording_extensions.object_storage.copy_objects") - def test_get_pinned_recordings_for_playlist(self, mock_copy_objects: MagicMock) -> None: - mock_copy_objects.return_value = 2 - - playlist = SessionRecordingPlaylist.objects.create(team=self.team, name="playlist", created_by=self.user) - - session_one = f"test_fetch_playlist_recordings-session1-{uuid4()}" - session_two = f"test_fetch_playlist_recordings-session2-{uuid4()}" - three_days_ago = (datetime.now() - timedelta(days=3)).replace(tzinfo=UTC) - - produce_replay_summary( - team_id=self.team.id, - session_id=session_one, - distinct_id="123", - first_timestamp=three_days_ago, - last_timestamp=three_days_ago, - ) - - produce_replay_summary( - team_id=self.team.id, - session_id=session_two, - distinct_id="123", - first_timestamp=three_days_ago, - last_timestamp=three_days_ago, - ) - - # Create playlist items - self.client.post( - f"/api/projects/{self.team.id}/session_recording_playlists/{playlist.short_id}/recordings/{session_one}" - ) - self.client.post( - f"/api/projects/{self.team.id}/session_recording_playlists/{playlist.short_id}/recordings/{session_two}" - ) - self.client.post( - f"/api/projects/{self.team.id}/session_recording_playlists/{playlist.short_id}/recordings/session-missing" - ) - - # Test get recordings - result = self.client.get( - f"/api/projects/{self.team.id}/session_recording_playlists/{playlist.short_id}/recordings" - ).json() - assert len(result["results"]) == 2 - assert {x["id"] for x in result["results"]} == {session_one, session_two} - - @patch("ee.session_recordings.session_recording_extensions.object_storage.list_objects") - @patch("ee.session_recordings.session_recording_extensions.object_storage.copy_objects") - def test_fetch_playlist_recordings(self, mock_copy_objects: MagicMock, mock_list_objects: MagicMock) -> None: - # all sessions have been blob ingested and had data to copy into the LTS storage location - mock_copy_objects.return_value = 1 - - playlist1 = SessionRecordingPlaylist.objects.create( - team=self.team, - name="playlist1", - created_by=self.user, - ) - playlist2 = SessionRecordingPlaylist.objects.create( - team=self.team, - name="playlist2", - created_by=self.user, - ) - - session_one = f"test_fetch_playlist_recordings-session1-{uuid4()}" - session_two = f"test_fetch_playlist_recordings-session2-{uuid4()}" - three_days_ago = (datetime.now() - timedelta(days=3)).replace(tzinfo=UTC) - - for session_id in [session_one, session_two]: - produce_replay_summary( - team_id=self.team.id, - session_id=session_id, - distinct_id="123", - first_timestamp=three_days_ago, - last_timestamp=three_days_ago, - ) - - self.client.post( - f"/api/projects/{self.team.id}/session_recording_playlists/{playlist1.short_id}/recordings/{session_one}", - ) - self.client.post( - f"/api/projects/{self.team.id}/session_recording_playlists/{playlist1.short_id}/recordings/{session_two}", - ) - self.client.post( - f"/api/projects/{self.team.id}/session_recording_playlists/{playlist2.short_id}/recordings/{session_one}", - ) - - result = self.client.get( - f"/api/projects/{self.team.id}/session_recording_playlists/{playlist1.short_id}/recordings", - ).json() - - assert len(result["results"]) == 2 - assert result["results"][0]["id"] == session_one - assert result["results"][1]["id"] == session_two - - # Test get recordings - result = self.client.get( - f"/api/projects/{self.team.id}/session_recording_playlists/{playlist2.short_id}/recordings", - ).json() - - assert len(result["results"]) == 1 - assert result["results"][0]["id"] == session_one - - def test_add_remove_static_playlist_items(self): - playlist1 = SessionRecordingPlaylist.objects.create( - team=self.team, - name="playlist1", - created_by=self.user, - ) - playlist2 = SessionRecordingPlaylist.objects.create( - team=self.team, - name="playlist2", - created_by=self.user, - ) - - recording1_session_id = "1" - recording2_session_id = "2" - - # Add recording 1 to playlist 1 - result = self.client.post( - f"/api/projects/{self.team.id}/session_recording_playlists/{playlist1.short_id}/recordings/{recording1_session_id}", - ).json() - assert result["success"] - playlist_item = SessionRecordingPlaylistItem.objects.filter( - playlist_id=playlist1.id, session_id=recording1_session_id - ) - assert playlist_item is not None - - # Add recording 2 to playlist 1 - result = self.client.post( - f"/api/projects/{self.team.id}/session_recording_playlists/{playlist1.short_id}/recordings/{recording2_session_id}", - ).json() - assert result["success"] - playlist_item = SessionRecordingPlaylistItem.objects.filter( - playlist_id=playlist1.id, session_id=recording2_session_id - ) - assert playlist_item is not None - - # Add recording 2 to playlist 2 - result = self.client.post( - f"/api/projects/{self.team.id}/session_recording_playlists/{playlist2.short_id}/recordings/{recording2_session_id}", - ).json() - assert result["success"] - playlist_item = SessionRecordingPlaylistItem.objects.filter( - playlist_id=playlist2.id, session_id=recording2_session_id - ) - assert playlist_item is not None - - session_recording_obj_1 = SessionRecording.get_or_build(team=self.team, session_id=recording1_session_id) - assert session_recording_obj_1 - - session_recording_obj_2 = SessionRecording.get_or_build(team=self.team, session_id=recording2_session_id) - assert session_recording_obj_2 - - # Delete playlist items - result = self.client.delete( - f"/api/projects/{self.team.id}/session_recording_playlists/{playlist1.short_id}/recordings/{recording1_session_id}", - ).json() - assert result["success"] - assert ( - SessionRecordingPlaylistItem.objects.filter( - playlist_id=playlist1.id, session_id=recording1_session_id - ).count() - == 0 - ) - result = self.client.delete( - f"/api/projects/{self.team.id}/session_recording_playlists/{playlist1.short_id}/recordings/{recording2_session_id}", - ).json() - assert result["success"] - assert ( - SessionRecordingPlaylistItem.objects.filter( - playlist_id=playlist1.id, session_id=recording2_session_id - ).count() - == 0 - ) - result = self.client.delete( - f"/api/projects/{self.team.id}/session_recording_playlists/{playlist2.short_id}/recordings/{recording2_session_id}", - ).json() - assert result["success"] - assert ( - SessionRecordingPlaylistItem.objects.filter( - playlist_id=playlist2.id, session_id=recording1_session_id - ).count() - == 0 - ) diff --git a/ee/settings.py b/ee/settings.py deleted file mode 100644 index 0a2be3cb50..0000000000 --- a/ee/settings.py +++ /dev/null @@ -1,75 +0,0 @@ -""" -Django settings for PostHog Enterprise Edition. -""" - -import os - -from posthog.settings import AUTHENTICATION_BACKENDS, DEBUG, DEMO, SITE_URL -from posthog.settings.utils import get_from_env -from posthog.utils import str_to_bool - -# SSO -AUTHENTICATION_BACKENDS = [ - *AUTHENTICATION_BACKENDS, - "ee.api.authentication.MultitenantSAMLAuth", - "ee.api.authentication.CustomGoogleOAuth2", -] - -# SAML base attributes -SOCIAL_AUTH_SAML_SP_ENTITY_ID = SITE_URL -SOCIAL_AUTH_SAML_SECURITY_CONFIG = { - "wantAttributeStatement": False, # AttributeStatement is optional in the specification - "requestedAuthnContext": False, # do not explicitly request a password login, also allow multifactor and others -} -# Attributes below are required for the SAML integration from social_core to work properly -SOCIAL_AUTH_SAML_SP_PUBLIC_CERT = "" -SOCIAL_AUTH_SAML_SP_PRIVATE_KEY = "" -SOCIAL_AUTH_SAML_ORG_INFO = {"en-US": {"name": "posthog", "displayname": "PostHog", "url": "https://posthog.com"}} -SOCIAL_AUTH_SAML_TECHNICAL_CONTACT = { - "givenName": "PostHog Support", - "emailAddress": "hey@posthog.com", -} -SOCIAL_AUTH_SAML_SUPPORT_CONTACT = SOCIAL_AUTH_SAML_TECHNICAL_CONTACT - - -# Google SSO -SOCIAL_AUTH_GOOGLE_OAUTH2_KEY = os.getenv("SOCIAL_AUTH_GOOGLE_OAUTH2_KEY") -SOCIAL_AUTH_GOOGLE_OAUTH2_SECRET = os.getenv("SOCIAL_AUTH_GOOGLE_OAUTH2_SECRET") -if "SOCIAL_AUTH_GOOGLE_OAUTH2_WHITELISTED_DOMAINS" in os.environ: - SOCIAL_AUTH_GOOGLE_OAUTH2_WHITELISTED_DOMAINS: list[str] = os.environ[ - "SOCIAL_AUTH_GOOGLE_OAUTH2_WHITELISTED_DOMAINS" - ].split(",") -elif DEMO: - # Only PostHog team members can use social auth in the demo environment - # This is because in the demo env social signups get is_staff=True to facilitate instance management - SOCIAL_AUTH_GOOGLE_OAUTH2_WHITELISTED_DOMAINS = ["posthog.com"] - -# Schedule to run column materialization on. Follows crontab syntax. -# Use empty string to prevent from materializing -MATERIALIZE_COLUMNS_SCHEDULE_CRON = get_from_env("MATERIALIZE_COLUMNS_SCHEDULE_CRON", "0 5 * * SAT") -# Minimum query time before a query if considered for optimization by adding materialized columns -MATERIALIZE_COLUMNS_MINIMUM_QUERY_TIME = get_from_env("MATERIALIZE_COLUMNS_MINIMUM_QUERY_TIME", 40000, type_cast=int) -# How many hours backwards to look for queries to optimize -MATERIALIZE_COLUMNS_ANALYSIS_PERIOD_HOURS = get_from_env( - "MATERIALIZE_COLUMNS_ANALYSIS_PERIOD_HOURS", 7 * 24, type_cast=int -) -# How big of a timeframe to backfill when materializing event properties. 0 for no backfilling -MATERIALIZE_COLUMNS_BACKFILL_PERIOD_DAYS = get_from_env("MATERIALIZE_COLUMNS_BACKFILL_PERIOD_DAYS", 0, type_cast=int) -# Maximum number of columns to materialize at once. Avoids running into resource bottlenecks (storage + ingest + backfilling). -MATERIALIZE_COLUMNS_MAX_AT_ONCE = get_from_env("MATERIALIZE_COLUMNS_MAX_AT_ONCE", 100, type_cast=int) - -BILLING_SERVICE_URL = get_from_env("BILLING_SERVICE_URL", "https://billing.posthog.com") - -# Whether to enable the admin portal. Default false for self-hosted as if not setup properly can pose security issues. -ADMIN_PORTAL_ENABLED = get_from_env("ADMIN_PORTAL_ENABLED", DEMO or DEBUG, type_cast=str_to_bool) - -PARALLEL_ASSET_GENERATION_MAX_TIMEOUT_MINUTES = get_from_env( - "PARALLEL_ASSET_GENERATION_MAX_TIMEOUT_MINUTES", 10.0, type_cast=float -) - -HOOK_HOG_FUNCTION_TEAMS = get_from_env("HOOK_HOG_FUNCTION_TEAMS", "", type_cast=str) - -# Assistant -LANGFUSE_PUBLIC_KEY = get_from_env("LANGFUSE_PUBLIC_KEY", "", type_cast=str) -LANGFUSE_SECRET_KEY = get_from_env("LANGFUSE_SECRET_KEY", "", type_cast=str) -LANGFUSE_HOST = get_from_env("LANGFUSE_HOST", "https://us.cloud.langfuse.com", type_cast=str) diff --git a/ee/surveys/summaries/summarize_surveys.py b/ee/surveys/summaries/summarize_surveys.py deleted file mode 100644 index 1b74ca04d6..0000000000 --- a/ee/surveys/summaries/summarize_surveys.py +++ /dev/null @@ -1,137 +0,0 @@ -import json - -import openai - -from datetime import datetime -from typing import Optional, cast - -from posthog.hogql import ast -from posthog.hogql.parser import parse_select -from posthog.hogql_queries.insights.paginators import HogQLHasMorePaginator -from posthog.schema import HogQLQueryResponse -from posthog.utils import get_instance_region - -from prometheus_client import Histogram - -from posthog.api.activity_log import ServerTimingsGathered -from posthog.models import Team, User - -import structlog - -logger = structlog.get_logger(__name__) - -TOKENS_IN_PROMPT_HISTOGRAM = Histogram( - "posthog_survey_summary_tokens_in_prompt_histogram", - "histogram of the number of tokens in the prompt used to generate a survey summary", - buckets=[ - 0, - 10, - 50, - 100, - 500, - 1000, - 2000, - 3000, - 4000, - 5000, - 6000, - 7000, - 8000, - 10000, - 20000, - 30000, - 40000, - 50000, - 100000, - 128000, - float("inf"), - ], -) - - -def prepare_data(query_response: HogQLQueryResponse) -> list[str]: - response_values = [] - properties_list: list[dict] = [json.loads(x[1]) for x in query_response.results] - for props in properties_list: - response_values.extend([value for key, value in props.items() if key.startswith("$survey_response") and value]) - return response_values - - -def summarize_survey_responses( - survey_id: str, question_index: Optional[int], survey_start: datetime, survey_end: datetime, team: Team, user: User -): - timer = ServerTimingsGathered() - - with timer("prepare_query"): - paginator = HogQLHasMorePaginator(limit=100, offset=0) - q = parse_select( - """ - SELECT distinct_id, properties - FROM events - WHERE event == 'survey sent' - AND properties.$survey_id = {survey_id} - -- e.g. `$survey_response` or `$survey_response_2` - AND trim(JSONExtractString(properties, {survey_response_property})) != '' - AND timestamp >= {start_date} - AND timestamp <= {end_date} - """, - { - "survey_id": ast.Constant(value=survey_id), - "survey_response_property": ast.Constant( - value=f"$survey_response_{question_index}" if question_index else "$survey_response" - ), - "start_date": ast.Constant(value=survey_start), - "end_date": ast.Constant(value=survey_end), - }, - ) - - with timer("run_query"): - query_response = paginator.execute_hogql_query( - team=team, - query_type="survey_response_list_query", - query=cast(ast.SelectQuery, q), - ) - - with timer("llm_api_prep"): - instance_region = get_instance_region() or "HOBBY" - prepared_data = prepare_data(query_response) - - with timer("openai_completion"): - result = openai.chat.completions.create( - model="gpt-4o-mini", # allows 128k tokens - temperature=0.7, - messages=[ - { - "role": "system", - "content": """ - You are a product manager's assistant. You summarise survey responses from users for the product manager. - You don't do any other tasks. - """, - }, - { - "role": "user", - "content": f"""the survey responses are {prepared_data}.""", - }, - { - "role": "user", - "content": """ - generate a one or two paragraph summary of the survey response. - only summarize, the goal is to identify real user pain points and needs -use bullet points to identify the themes, and highlights of quotes to bring them to life -we're trying to identify what to work on - use as concise and simple language as is possible. - generate no text other than the summary. - the aim is to let people see themes in the responses received. return the text in markdown format without using any paragraph formatting""", - }, - ], - user=f"{instance_region}/{user.pk}", - ) - - usage = result.usage.prompt_tokens if result.usage else None - if usage: - TOKENS_IN_PROMPT_HISTOGRAM.observe(usage) - - logger.info("survey_summary_response", result=result) - - content: str = result.choices[0].message.content or "" - return {"content": content, "timings": timer.get_all_timings()} diff --git a/ee/tasks/__init__.py b/ee/tasks/__init__.py deleted file mode 100644 index 4bc7933994..0000000000 --- a/ee/tasks/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -from ee.session_recordings.persistence_tasks import ( - persist_finished_recordings, - persist_single_recording, -) -from .subscriptions import ( - deliver_subscription_report, - handle_subscription_value_change, - schedule_all_subscriptions, -) - -# As our EE tasks are not included at startup for Celery, we need to ensure they are declared here so that they are imported by posthog/settings/celery.py - -__all__ = [ - "persist_single_recording", - "persist_finished_recordings", - "schedule_all_subscriptions", - "deliver_subscription_report", - "handle_subscription_value_change", -] diff --git a/ee/tasks/auto_rollback_feature_flag.py b/ee/tasks/auto_rollback_feature_flag.py deleted file mode 100644 index f676f91d0c..0000000000 --- a/ee/tasks/auto_rollback_feature_flag.py +++ /dev/null @@ -1,85 +0,0 @@ -from datetime import datetime, timedelta -from zoneinfo import ZoneInfo - -from celery import shared_task - -from ee.api.sentry_stats import get_stats_for_timerange -from posthog.models.feature_flag import FeatureFlag -from posthog.models.filters.filter import Filter -from posthog.models.team import Team -from posthog.queries.trends.trends import Trends - - -def check_flags_to_rollback(): - flags_with_threshold = FeatureFlag.objects.exclude(rollback_conditions__isnull=True).exclude( - rollback_conditions__exact=[] - ) - - for feature_flag in flags_with_threshold: - check_feature_flag_rollback_conditions(feature_flag_id=feature_flag.pk) - - -@shared_task(ignore_result=True, max_retries=2) -def check_feature_flag_rollback_conditions(feature_flag_id: int) -> None: - flag: FeatureFlag = FeatureFlag.objects.get(pk=feature_flag_id) - - if any(check_condition(condition, flag) for condition in flag.rollback_conditions): - flag.performed_rollback = True - flag.active = False - flag.save() - - -def calculate_rolling_average(threshold_metric: dict, team: Team, timezone: str) -> float: - curr = datetime.now(tz=ZoneInfo(timezone)) - - rolling_average_days = 7 - - filter = Filter( - data={ - **threshold_metric, - "date_from": (curr - timedelta(days=rolling_average_days)).strftime("%Y-%m-%d %H:%M:%S.%f"), - "date_to": curr.strftime("%Y-%m-%d %H:%M:%S.%f"), - }, - team=team, - ) - trends_query = Trends() - result = trends_query.run(filter, team) - - if not len(result): - return False - - data = result[0]["data"] - - return sum(data) / rolling_average_days - - -def check_condition(rollback_condition: dict, feature_flag: FeatureFlag) -> bool: - if rollback_condition["threshold_type"] == "sentry": - created_date = feature_flag.created_at - base_start_date = created_date.strftime("%Y-%m-%dT%H:%M:%S") - base_end_date = (created_date + timedelta(days=1)).strftime("%Y-%m-%dT%H:%M:%S") - - current_time = datetime.utcnow() - target_end_date = current_time.strftime("%Y-%m-%dT%H:%M:%S") - target_start_date = (current_time - timedelta(days=1)).strftime("%Y-%m-%dT%H:%M:%S") - - base, target = get_stats_for_timerange(base_start_date, base_end_date, target_start_date, target_end_date) - - if rollback_condition["operator"] == "lt": - return target < float(rollback_condition["threshold"]) * base - else: - return target > float(rollback_condition["threshold"]) * base - - elif rollback_condition["threshold_type"] == "insight": - rolling_average = calculate_rolling_average( - rollback_condition["threshold_metric"], - feature_flag.team, - feature_flag.team.timezone, - ) - - if rollback_condition["operator"] == "lt": - return rolling_average < rollback_condition["threshold"] - else: - return rolling_average > rollback_condition["threshold"] - - return False diff --git a/ee/tasks/materialized_columns.py b/ee/tasks/materialized_columns.py deleted file mode 100644 index 98091c3b1d..0000000000 --- a/ee/tasks/materialized_columns.py +++ /dev/null @@ -1,60 +0,0 @@ -from collections.abc import Iterator -from dataclasses import dataclass -from celery.utils.log import get_task_logger -from clickhouse_driver import Client - -from ee.clickhouse.materialized_columns.columns import MaterializedColumn, get_cluster, tables as table_infos -from posthog.client import sync_execute -from posthog.settings import CLICKHOUSE_DATABASE -from posthog.clickhouse.materialized_columns import ColumnName, TablesWithMaterializedColumns - -logger = get_task_logger(__name__) - - -@dataclass -class MarkMaterializedTask: - table: str - column: MaterializedColumn - - def execute(self, client: Client) -> None: - expression, parameters = self.column.get_expression_and_parameters() - client.execute( - f"ALTER TABLE {self.table} MODIFY COLUMN {self.column.name} {self.column.type} MATERIALIZED {expression}", - parameters, - ) - - -def mark_all_materialized() -> None: - cluster = get_cluster() - - for table_name, column in get_materialized_columns_with_default_expression(): - table_info = table_infos[table_name] - table_info.map_data_nodes( - cluster, - MarkMaterializedTask( - table_info.data_table, - column, - ).execute, - ).result() - - -def get_materialized_columns_with_default_expression() -> Iterator[tuple[str, MaterializedColumn]]: - table_names: list[TablesWithMaterializedColumns] = ["events", "person"] - for table_name in table_names: - for column in MaterializedColumn.get_all(table_name): - if is_default_expression(table_name, column.name): - yield table_name, column - - -def any_ongoing_mutations() -> bool: - running_mutations_count = sync_execute("SELECT count(*) FROM system.mutations WHERE is_done = 0")[0][0] - return running_mutations_count > 0 - - -def is_default_expression(table: str, column_name: ColumnName) -> bool: - updated_table = "sharded_events" if table == "events" else table - column_query = sync_execute( - "SELECT default_kind FROM system.columns WHERE table = %(table)s AND name = %(name)s AND database = %(database)s", - {"table": updated_table, "name": column_name, "database": CLICKHOUSE_DATABASE}, - ) - return len(column_query) > 0 and column_query[0][0] == "DEFAULT" diff --git a/ee/tasks/send_license_usage.py b/ee/tasks/send_license_usage.py deleted file mode 100644 index 72ad3f171c..0000000000 --- a/ee/tasks/send_license_usage.py +++ /dev/null @@ -1,103 +0,0 @@ -import posthoganalytics -import requests -from dateutil.relativedelta import relativedelta -from django.utils import timezone -from django.utils.timezone import now - -from ee.models.license import License -from posthog.client import sync_execute -from posthog.models import User -from posthog.settings import SITE_URL - - -def send_license_usage(): - license = License.objects.first_valid() - user = User.objects.filter(is_active=True).first() - - if not license: - return - - # New type of license key for billing - if license.is_v2_license: - return - - try: - date_from = (timezone.now() - relativedelta(days=1)).replace(hour=0, minute=0, second=0, microsecond=0) - date_to = timezone.now().replace(hour=0, minute=0, second=0, microsecond=0) - - events_count = sync_execute( - "select count(1) from events where timestamp >= %(date_from)s and timestamp < %(date_to)s and not startsWith(event, '$$')", - {"date_from": date_from, "date_to": date_to}, - )[0][0] - response = requests.post( - "https://license.posthog.com/licenses/usage", - data={ - "date": date_from.strftime("%Y-%m-%d"), - "key": license.key, - "events_count": events_count, - }, - ) - - if response.status_code == 404 and response.json().get("code") == "not_found": - license.valid_until = now() - relativedelta(hours=1) - license.save() - - if response.status_code == 400 and response.json().get("code") == "already_sent": - return - - if response.json().get("valid_until"): - license.valid_until = response.json()["valid_until"] - license.save() - - if not response.ok: - posthoganalytics.capture( - user.distinct_id, # type: ignore - "send license usage data error", - { - "error": response.content, - "status_code": response.status_code, - "date": date_from.strftime("%Y-%m-%d"), - "events_count": events_count, - "organization_name": user.current_organization.name, # type: ignore - }, - groups={ - "organization": str(user.current_organization.id), # type: ignore - "instance": SITE_URL, - }, - ) - response.raise_for_status() - return - else: - posthoganalytics.capture( - user.distinct_id, # type: ignore - "send license usage data", - { - "date": date_from.strftime("%Y-%m-%d"), - "events_count": events_count, - "license_keys": [license.key for license in License.objects.all()], - "organization_name": user.current_organization.name, # type: ignore - }, - groups={ - "organization": str(user.current_organization.id), # type: ignore - "instance": SITE_URL, - }, - ) - except Exception as err: - try: - posthoganalytics.capture( - user.distinct_id, # type: ignore - "send license usage data error", - { - "error": str(err), - "date": date_from.strftime("%Y-%m-%d"), - "organization_name": user.current_organization.name, # type: ignore - }, - groups={ - "organization": str(user.current_organization.id), # type: ignore - "instance": SITE_URL, - }, - ) - raise err - except: - # If the posthoganalytics call errors, just throw the original error rather than that error - raise err diff --git a/ee/tasks/slack.py b/ee/tasks/slack.py deleted file mode 100644 index c3e2a41422..0000000000 --- a/ee/tasks/slack.py +++ /dev/null @@ -1,103 +0,0 @@ -import re -from typing import Any -from urllib.parse import urlparse - -import structlog -from django.conf import settings - -from ee.tasks.subscriptions.subscription_utils import generate_assets -from posthog.models.exported_asset import ExportedAsset -from posthog.models.integration import Integration, SlackIntegration -from posthog.models.sharing_configuration import SharingConfiguration - -logger = structlog.get_logger(__name__) - - -SHARED_LINK_REGEX = r"\/(?:shared_dashboard|shared|embedded)\/(.+)" - - -def _block_for_asset(asset: ExportedAsset) -> dict: - image_url = asset.get_public_content_url() - alt_text = None - if asset.insight: - alt_text = asset.insight.name or asset.insight.derived_name - - if settings.DEBUG: - image_url = "https://source.unsplash.com/random" - - return {"type": "image", "image_url": image_url, "alt_text": alt_text} - - -def _handle_slack_event(event_payload: Any) -> None: - slack_team_id = event_payload.get("team_id") - channel = event_payload.get("event").get("channel") - message_ts = event_payload.get("event").get("message_ts") - unfurl_id = event_payload.get("event").get("unfurl_id") - source = event_payload.get("event").get("source") - links_to_unfurl = event_payload.get("event").get("links") - - unfurls = {} - - for link_obj in links_to_unfurl: - link = link_obj.get("url") - parsed = urlparse(link) - matches = re.search(SHARED_LINK_REGEX, parsed.path) - - if matches: - share_token = matches[1] - - # First we try and get the sharingconfig for the given link - try: - sharing_config: SharingConfiguration = SharingConfiguration.objects.get( - access_token=share_token, enabled=True - ) - except SharingConfiguration.DoesNotExist: - logger.info("No SharingConfiguration found") - continue - - team_id = sharing_config.team_id - - # Now we try and get the SlackIntegration for the specificed PostHog team and Slack Team - try: - integration = Integration.objects.get(kind="slack", team=team_id, config__team__id=slack_team_id) - slack_integration = SlackIntegration(integration) - - except Integration.DoesNotExist: - logger.info("No SlackIntegration found for this team") - continue - - # With both the integration and the resource we are good to go!! - - insights, assets = generate_assets(sharing_config, 1) - - if assets: - unfurls[link] = { - "blocks": [ - { - "type": "section", - "text": { - "type": "mrkdwn", - "text": insights[0].name or insights[0].derived_name, - }, - "accessory": _block_for_asset(assets[0]), - } - ] - } - - if unfurls: - try: - slack_integration.client.chat_unfurl(unfurls=unfurls, unfurl_id=unfurl_id, source=source, channel="", ts="") - except Exception: - # NOTE: This is temporary as a test to understand if the channel and ts are actually required as the docs are not clear - slack_integration.client.chat_unfurl( - unfurls=unfurls, - unfurl_id=unfurl_id, - source=source, - channel=channel, - ts=message_ts, - ) - raise - - -def handle_slack_event(payload: Any) -> None: - return _handle_slack_event(payload) diff --git a/ee/tasks/subscriptions/__init__.py b/ee/tasks/subscriptions/__init__.py deleted file mode 100644 index 7ca0e06e6d..0000000000 --- a/ee/tasks/subscriptions/__init__.py +++ /dev/null @@ -1,169 +0,0 @@ -from datetime import datetime, timedelta -from typing import Optional - -import structlog -from celery import shared_task -from prometheus_client import Counter -from sentry_sdk import capture_exception, capture_message - -from ee.tasks.subscriptions.email_subscriptions import send_email_subscription_report -from ee.tasks.subscriptions.slack_subscriptions import send_slack_subscription_report -from ee.tasks.subscriptions.subscription_utils import generate_assets -from posthog import settings -from posthog.models.subscription import Subscription -from posthog.tasks.utils import CeleryQueue - -logger = structlog.get_logger(__name__) - -SUBSCRIPTION_QUEUED = Counter( - "subscription_queued", - "A subscription was queued for delivery", - labelnames=["destination"], -) -SUBSCRIPTION_SUCCESS = Counter( - "subscription_send_success", - "A subscription was sent successfully", - labelnames=["destination"], -) -SUBSCRIPTION_FAILURE = Counter( - "subscription_send_failure", - "A subscription failed to send", - labelnames=["destination"], -) - - -def _deliver_subscription_report( - subscription_id: int, - previous_value: Optional[str] = None, - invite_message: Optional[str] = None, -) -> None: - subscription = ( - Subscription.objects.prefetch_related("dashboard__insights") - .select_related("created_by", "insight", "dashboard") - .get(pk=subscription_id) - ) - - is_new_subscription_target = False - if previous_value is not None: - # If previous_value is set we are triggering a "new" or "invite" message - is_new_subscription_target = subscription.target_value != previous_value - - if not is_new_subscription_target: - # Same value as before so nothing to do - return - - insights, assets = generate_assets(subscription) - - if not assets: - capture_message( - "No assets are in this subscription", - tags={"subscription_id": subscription.id}, - ) - return - - if subscription.target_type == "email": - SUBSCRIPTION_QUEUED.labels(destination="email").inc() - - # Send emails - emails = subscription.target_value.split(",") - if is_new_subscription_target: - previous_emails = previous_value.split(",") if previous_value else [] - emails = list(set(emails) - set(previous_emails)) - - for email in emails: - try: - send_email_subscription_report( - email, - subscription, - assets, - invite_message=invite_message or "" if is_new_subscription_target else None, - total_asset_count=len(insights), - ) - except Exception as e: - SUBSCRIPTION_FAILURE.labels(destination="email").inc() - logger.error( - "sending subscription failed", - subscription_id=subscription.id, - next_delivery_date=subscription.next_delivery_date, - destination=subscription.target_type, - exc_info=True, - ) - capture_exception(e) - - SUBSCRIPTION_SUCCESS.labels(destination="email").inc() - - elif subscription.target_type == "slack": - SUBSCRIPTION_QUEUED.labels(destination="slack").inc() - - try: - send_slack_subscription_report( - subscription, - assets, - total_asset_count=len(insights), - is_new_subscription=is_new_subscription_target, - ) - SUBSCRIPTION_SUCCESS.labels(destination="slack").inc() - except Exception as e: - SUBSCRIPTION_FAILURE.labels(destination="slack").inc() - logger.error( - "sending subscription failed", - subscription_id=subscription.id, - next_delivery_date=subscription.next_delivery_date, - destination=subscription.target_type, - exc_info=True, - ) - capture_exception(e) - else: - raise NotImplementedError(f"{subscription.target_type} is not supported") - - if not is_new_subscription_target: - subscription.set_next_delivery_date(subscription.next_delivery_date) - subscription.save(update_fields=["next_delivery_date"]) - - -@shared_task(queue=CeleryQueue.SUBSCRIPTION_DELIVERY.value) -def schedule_all_subscriptions() -> None: - """ - Schedule all past notifications (with a buffer) to be delivered - NOTE: This task is scheduled hourly just before the hour allowing for the 15 minute timedelta to cover - all upcoming hourly scheduled subscriptions - """ - now_with_buffer = datetime.utcnow() + timedelta(minutes=15) - subscriptions = ( - Subscription.objects.filter(next_delivery_date__lte=now_with_buffer, deleted=False) - .exclude(dashboard__deleted=True) - .exclude(insight__deleted=True) - .all() - ) - - for subscription in subscriptions: - logger.info( - "Scheduling subscription", - subscription_id=subscription.id, - next_delivery_date=subscription.next_delivery_date, - destination=subscription.target_type, - ) - deliver_subscription_report.delay(subscription.id) - - -report_timeout_seconds = settings.PARALLEL_ASSET_GENERATION_MAX_TIMEOUT_MINUTES * 60 * 1.5 - - -@shared_task( - soft_time_limit=report_timeout_seconds, - time_limit=report_timeout_seconds + 10, - queue=CeleryQueue.SUBSCRIPTION_DELIVERY.value, -) -def deliver_subscription_report(subscription_id: int) -> None: - return _deliver_subscription_report(subscription_id) - - -@shared_task( - soft_time_limit=report_timeout_seconds, - time_limit=report_timeout_seconds + 10, - queue=CeleryQueue.SUBSCRIPTION_DELIVERY.value, -) -def handle_subscription_value_change( - subscription_id: int, previous_value: str, invite_message: Optional[str] = None -) -> None: - return _deliver_subscription_report(subscription_id, previous_value, invite_message) diff --git a/ee/tasks/subscriptions/email_subscriptions.py b/ee/tasks/subscriptions/email_subscriptions.py deleted file mode 100644 index 39e342bcec..0000000000 --- a/ee/tasks/subscriptions/email_subscriptions.py +++ /dev/null @@ -1,67 +0,0 @@ -import uuid -from typing import Optional - -import structlog - -from ee.tasks.subscriptions.subscription_utils import UTM_TAGS_BASE -from posthog.email import EmailMessage -from posthog.models.exported_asset import ExportedAsset -from posthog.models.subscription import Subscription, get_unsubscribe_token -from posthog.utils import absolute_uri - -logger = structlog.get_logger(__name__) - - -def send_email_subscription_report( - email: str, - subscription: Subscription, - assets: list[ExportedAsset], - invite_message: Optional[str] = None, - total_asset_count: Optional[int] = None, -) -> None: - utm_tags = f"{UTM_TAGS_BASE}&utm_medium=email" - - inviter = subscription.created_by - is_invite = invite_message is not None - self_invite = inviter.email == email - - subject = "PostHog Report" - invite_summary = None - - resource_info = subscription.resource_info - if not resource_info: - raise NotImplementedError("This type of subscription resource is not supported") - - subject = f"PostHog {resource_info.kind} report - {resource_info.name}" - campaign_key = f"{resource_info.kind.lower()}_subscription_report_{subscription.next_delivery_date.isoformat()}" - - unsubscribe_url = absolute_uri(f"/unsubscribe?token={get_unsubscribe_token(subscription, email)}&{utm_tags}") - - if is_invite: - invite_summary = f"This subscription is { subscription.summary }. The next subscription will be sent on { subscription.next_delivery_date.strftime('%A %B %d, %Y')}" - if self_invite: - subject = f"You have been subscribed to a PostHog {resource_info.kind}" - else: - subject = f"{inviter.first_name or 'Someone'} subscribed you to a PostHog {resource_info.kind}" - campaign_key = f"{resource_info.kind.lower()}_subscription_new_{uuid.uuid4()}" - - message = EmailMessage( - campaign_key=campaign_key, - subject=subject, - template_name="subscription_report", - template_context={ - "images": [x.get_public_content_url() for x in assets], - "resource_noun": resource_info.kind, - "resource_name": resource_info.name, - "resource_url": f"{resource_info.url}?{utm_tags}", - "subscription_url": f"{subscription.url}?{utm_tags}", - "unsubscribe_url": unsubscribe_url, - "inviter": inviter if is_invite else None, - "self_invite": self_invite, - "invite_message": invite_message, - "invite_summary": invite_summary, - "total_asset_count": total_asset_count, - }, - ) - message.add_recipient(email=email) - message.send() diff --git a/ee/tasks/subscriptions/slack_subscriptions.py b/ee/tasks/subscriptions/slack_subscriptions.py deleted file mode 100644 index 73643c7a97..0000000000 --- a/ee/tasks/subscriptions/slack_subscriptions.py +++ /dev/null @@ -1,117 +0,0 @@ -import structlog -from django.conf import settings - -from posthog.models.exported_asset import ExportedAsset -from posthog.models.integration import Integration, SlackIntegration -from posthog.models.subscription import Subscription - -logger = structlog.get_logger(__name__) - -UTM_TAGS_BASE = "utm_source=posthog&utm_campaign=subscription_report" - - -def _block_for_asset(asset: ExportedAsset) -> dict: - image_url = asset.get_public_content_url() - alt_text = None - if asset.insight: - alt_text = asset.insight.name or asset.insight.derived_name - - if settings.DEBUG: - image_url = "https://source.unsplash.com/random" - - return {"type": "image", "image_url": image_url, "alt_text": alt_text} - - -def send_slack_subscription_report( - subscription: Subscription, - assets: list[ExportedAsset], - total_asset_count: int, - is_new_subscription: bool = False, -) -> None: - utm_tags = f"{UTM_TAGS_BASE}&utm_medium=slack" - - resource_info = subscription.resource_info - if not resource_info: - raise NotImplementedError("This type of subscription resource is not supported") - - integration = Integration.objects.filter(team=subscription.team, kind="slack").first() - - if not integration: - # TODO: Write error to subscription... - logger.error("No Slack integration found for team...") - return - - slack_integration = SlackIntegration(integration) - - channel = subscription.target_value.split("|")[0] - - first_asset, *other_assets = assets - - if is_new_subscription: - title = f"This channel has been subscribed to the {resource_info.kind} *{resource_info.name}* on PostHog! πŸŽ‰" - title += f"\nThis subscription is {subscription.summary}. The next one will be sent on {subscription.next_delivery_date.strftime('%A %B %d, %Y')}" - else: - title = f"Your subscription to the {resource_info.kind} *{resource_info.name}* is ready! πŸŽ‰" - - blocks = [] - - blocks.extend( - [ - {"type": "section", "text": {"type": "mrkdwn", "text": title}}, - _block_for_asset(first_asset), - ] - ) - - if other_assets: - blocks.append( - { - "type": "section", - "text": {"type": "mrkdwn", "text": "_See 🧡 for more Insights_"}, - } - ) - - blocks.extend( - [ - {"type": "divider"}, - { - "type": "actions", - "elements": [ - { - "type": "button", - "text": {"type": "plain_text", "text": "View in PostHog"}, - "url": f"{resource_info.url}?{utm_tags}", - }, - { - "type": "button", - "text": {"type": "plain_text", "text": "Manage Subscription"}, - "url": f"{subscription.url}?{utm_tags}", - }, - ], - }, - ] - ) - - message_res = slack_integration.client.chat_postMessage(channel=channel, blocks=blocks, text=title) - - thread_ts = message_res.get("ts") - - if thread_ts: - for asset in other_assets: - slack_integration.client.chat_postMessage( - channel=channel, thread_ts=thread_ts, blocks=[_block_for_asset(asset)] - ) - - if total_asset_count > len(assets): - slack_integration.client.chat_postMessage( - channel=channel, - thread_ts=thread_ts, - blocks=[ - { - "type": "section", - "text": { - "type": "mrkdwn", - "text": f"Showing {len(assets)} of {total_asset_count} Insights. <{resource_info.url}?{utm_tags}|View the rest in PostHog>", - }, - } - ], - ) diff --git a/ee/tasks/subscriptions/subscription_utils.py b/ee/tasks/subscriptions/subscription_utils.py deleted file mode 100644 index eb8afed13c..0000000000 --- a/ee/tasks/subscriptions/subscription_utils.py +++ /dev/null @@ -1,69 +0,0 @@ -import datetime -from typing import Union -from django.conf import settings -import structlog -from celery import chain -from prometheus_client import Histogram - -from posthog.models.dashboard_tile import get_tiles_ordered_by_position -from posthog.models.exported_asset import ExportedAsset -from posthog.models.insight import Insight -from posthog.models.sharing_configuration import SharingConfiguration -from posthog.models.subscription import Subscription -from posthog.tasks import exporter -from posthog.utils import wait_for_parallel_celery_group - -logger = structlog.get_logger(__name__) - -UTM_TAGS_BASE = "utm_source=posthog&utm_campaign=subscription_report" -DEFAULT_MAX_ASSET_COUNT = 6 - -SUBSCRIPTION_ASSET_GENERATION_TIMER = Histogram( - "subscription_asset_generation_duration_seconds", - "Time spent generating assets for a subscription", - buckets=(1, 5, 10, 30, 60, 120, 240, 300, 360, 420, 480, 540, 600, float("inf")), -) - - -def generate_assets( - resource: Union[Subscription, SharingConfiguration], - max_asset_count: int = DEFAULT_MAX_ASSET_COUNT, -) -> tuple[list[Insight], list[ExportedAsset]]: - with SUBSCRIPTION_ASSET_GENERATION_TIMER.time(): - if resource.dashboard: - tiles = get_tiles_ordered_by_position(resource.dashboard) - insights = [tile.insight for tile in tiles if tile.insight] - elif resource.insight: - insights = [resource.insight] - else: - raise Exception("There are no insights to be sent for this Subscription") - - # Create all the assets we need - assets = [ - ExportedAsset( - team=resource.team, - export_format="image/png", - insight=insight, - dashboard=resource.dashboard, - ) - for insight in insights[:max_asset_count] - ] - ExportedAsset.objects.bulk_create(assets) - - if not assets: - return insights, assets - - # Wait for all assets to be exported - tasks = [exporter.export_asset.si(asset.id) for asset in assets] - # run them one after the other, so we don't exhaust celery workers - exports_expire = datetime.datetime.now(tz=datetime.UTC) + datetime.timedelta( - minutes=settings.PARALLEL_ASSET_GENERATION_MAX_TIMEOUT_MINUTES - ) - parallel_job = chain(*tasks).apply_async(expires=exports_expire, retry=False) - - wait_for_parallel_celery_group( - parallel_job, - expires=exports_expire, - ) - - return insights, assets diff --git a/ee/tasks/test/__init__.py b/ee/tasks/test/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/tasks/test/subscriptions/__init__.py b/ee/tasks/test/subscriptions/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ee/tasks/test/subscriptions/subscriptions_test_factory.py b/ee/tasks/test/subscriptions/subscriptions_test_factory.py deleted file mode 100644 index cbe268b76e..0000000000 --- a/ee/tasks/test/subscriptions/subscriptions_test_factory.py +++ /dev/null @@ -1,19 +0,0 @@ -from datetime import datetime -from typing import Any - -from zoneinfo import ZoneInfo - -from posthog.models.subscription import Subscription - - -def create_subscription(**kwargs: Any) -> Subscription: - payload = { - "target_type": "email", - "target_value": "test1@posthog.com,test2@posthog.com", - "frequency": "daily", - "interval": 1, - "start_date": datetime(2022, 1, 1, 9, 0).replace(tzinfo=ZoneInfo("UTC")), - } - - payload.update(kwargs) - return Subscription.objects.create(**payload) diff --git a/ee/tasks/test/subscriptions/test_email_subscriptions.py b/ee/tasks/test/subscriptions/test_email_subscriptions.py deleted file mode 100644 index dbb6bca116..0000000000 --- a/ee/tasks/test/subscriptions/test_email_subscriptions.py +++ /dev/null @@ -1,98 +0,0 @@ -from unittest.mock import MagicMock, patch - -from freezegun import freeze_time - -from ee.tasks.subscriptions.email_subscriptions import send_email_subscription_report -from ee.tasks.test.subscriptions.subscriptions_test_factory import create_subscription -from posthog.models.dashboard import Dashboard -from posthog.models.exported_asset import ExportedAsset -from posthog.models.insight import Insight -from posthog.models.instance_setting import set_instance_setting -from posthog.models.subscription import Subscription -from posthog.tasks.test.utils_email_tests import mock_email_messages -from posthog.test.base import APIBaseTest - - -def mock_ee_email_messages(MockEmailMessage: MagicMock): - return mock_email_messages(MockEmailMessage, path="ee/tasks/test/__emails__/") - - -@patch("ee.tasks.subscriptions.email_subscriptions.EmailMessage") -@freeze_time("2022-02-02T08:55:00.000Z") -class TestEmailSubscriptionsTasks(APIBaseTest): - subscription: Subscription - dashboard: Dashboard - insight: Insight - asset: ExportedAsset - - def setUp(self) -> None: - self.dashboard = Dashboard.objects.create(team=self.team, name="private dashboard", created_by=self.user) - self.insight = Insight.objects.create(team=self.team, short_id="123456", name="My Test subscription") - - set_instance_setting("EMAIL_HOST", "fake_host") - set_instance_setting("EMAIL_ENABLED", True) - - self.asset = ExportedAsset.objects.create(team=self.team, insight_id=self.insight.id, export_format="image/png") - self.subscription = create_subscription(team=self.team, insight=self.insight, created_by=self.user) - - def test_subscription_delivery(self, MockEmailMessage: MagicMock) -> None: - mocked_email_messages = mock_ee_email_messages(MockEmailMessage) - - send_email_subscription_report("test1@posthog.com", self.subscription, [self.asset]) - - assert len(mocked_email_messages) == 1 - assert mocked_email_messages[0].send.call_count == 1 - assert "is ready!" in mocked_email_messages[0].html_body - assert f"/exporter/export-my-test-subscription.png?token=ey" in mocked_email_messages[0].html_body - - def test_new_subscription_delivery(self, MockEmailMessage: MagicMock) -> None: - mocked_email_messages = mock_ee_email_messages(MockEmailMessage) - - send_email_subscription_report( - "test1@posthog.com", - self.subscription, - [self.asset], - invite_message="My invite message", - ) - - assert len(mocked_email_messages) == 1 - assert mocked_email_messages[0].send.call_count == 1 - - assert f"has subscribed you" in mocked_email_messages[0].html_body - assert "Someone subscribed you to a PostHog Insight" == mocked_email_messages[0].subject - assert "This subscription is sent every day. The next subscription will be sent on Wednesday February 02, 2022" - assert "My invite message" in mocked_email_messages[0].html_body - - def test_should_have_different_text_for_self(self, MockEmailMessage: MagicMock) -> None: - mocked_email_messages = mock_ee_email_messages(MockEmailMessage) - - send_email_subscription_report( - self.user.email, - self.subscription, - [self.asset], - invite_message="My invite message", - ) - - assert len(mocked_email_messages) == 1 - assert mocked_email_messages[0].send.call_count == 1 - assert "You have been subscribed" in mocked_email_messages[0].html_body - assert "You have been subscribed to a PostHog Insight" == mocked_email_messages[0].subject - - def test_sends_dashboard_subscription(self, MockEmailMessage: MagicMock) -> None: - mocked_email_messages = mock_ee_email_messages(MockEmailMessage) - - subscription = create_subscription(team=self.team, dashboard=self.dashboard, created_by=self.user) - - send_email_subscription_report( - self.user.email, - subscription, - [self.asset], - invite_message="My invite message", - total_asset_count=10, - ) - - assert len(mocked_email_messages) == 1 - assert mocked_email_messages[0].send.call_count == 1 - assert "You have been subscribed" in mocked_email_messages[0].html_body - assert "You have been subscribed to a PostHog Dashboard" == mocked_email_messages[0].subject - assert f"SHOWING 1 OF 10 DASHBOARD INSIGHTS" in mocked_email_messages[0].html_body diff --git a/ee/tasks/test/subscriptions/test_slack_subscriptions.py b/ee/tasks/test/subscriptions/test_slack_subscriptions.py deleted file mode 100644 index b340843549..0000000000 --- a/ee/tasks/test/subscriptions/test_slack_subscriptions.py +++ /dev/null @@ -1,199 +0,0 @@ -from unittest.mock import MagicMock, patch - -from freezegun import freeze_time - -from ee.tasks.subscriptions.slack_subscriptions import send_slack_subscription_report -from ee.tasks.test.subscriptions.subscriptions_test_factory import create_subscription -from posthog.models.dashboard import Dashboard -from posthog.models.exported_asset import ExportedAsset -from posthog.models.insight import Insight -from posthog.models.integration import Integration -from posthog.models.subscription import Subscription -from posthog.test.base import APIBaseTest - - -@patch("ee.tasks.subscriptions.slack_subscriptions.SlackIntegration") -@freeze_time("2022-02-02T08:30:00.000Z") -class TestSlackSubscriptionsTasks(APIBaseTest): - subscription: Subscription - dashboard: Dashboard - insight: Insight - asset: ExportedAsset - integration: Integration - - def setUp(self) -> None: - self.dashboard = Dashboard.objects.create(team=self.team, name="private dashboard", created_by=self.user) - self.insight = Insight.objects.create(team=self.team, short_id="123456", name="My Test subscription") - self.asset = ExportedAsset.objects.create(team=self.team, insight_id=self.insight.id, export_format="image/png") - self.subscription = create_subscription( - team=self.team, - insight=self.insight, - created_by=self.user, - target_type="slack", - target_value="C12345|#test-channel", - ) - - self.integration = Integration.objects.create(team=self.team, kind="slack") - - def test_subscription_delivery(self, MockSlackIntegration: MagicMock) -> None: - mock_slack_integration = MagicMock() - MockSlackIntegration.return_value = mock_slack_integration - mock_slack_integration.client.chat_postMessage.return_value = {"ts": "1.234"} - - send_slack_subscription_report(self.subscription, [self.asset], 1) - - assert mock_slack_integration.client.chat_postMessage.call_count == 1 - post_message_calls = mock_slack_integration.client.chat_postMessage.call_args_list - first_call = post_message_calls[0].kwargs - - assert first_call["channel"] == "C12345" - assert first_call["text"] == "Your subscription to the Insight *My Test subscription* is ready! πŸŽ‰" - assert first_call["blocks"] == [ - { - "type": "section", - "text": { - "type": "mrkdwn", - "text": "Your subscription to the Insight *My Test subscription* is ready! πŸŽ‰", - }, - }, - { - "type": "image", - "image_url": post_message_calls[0].kwargs["blocks"][1]["image_url"], - "alt_text": "My Test subscription", - }, - {"type": "divider"}, - { - "type": "actions", - "elements": [ - { - "type": "button", - "text": {"type": "plain_text", "text": "View in PostHog"}, - "url": "http://localhost:8010/insights/123456?utm_source=posthog&utm_campaign=subscription_report&utm_medium=slack", - }, - { - "type": "button", - "text": {"type": "plain_text", "text": "Manage Subscription"}, - "url": f"http://localhost:8010/insights/123456/subscriptions/{self.subscription.id}?utm_source=posthog&utm_campaign=subscription_report&utm_medium=slack", - }, - ], - }, - ] - - def test_subscription_delivery_new(self, MockSlackIntegration: MagicMock) -> None: - mock_slack_integration = MagicMock() - MockSlackIntegration.return_value = mock_slack_integration - mock_slack_integration.client.chat_postMessage.return_value = {"ts": "1.234"} - - send_slack_subscription_report(self.subscription, [self.asset], 1, is_new_subscription=True) - - assert mock_slack_integration.client.chat_postMessage.call_count == 1 - post_message_calls = mock_slack_integration.client.chat_postMessage.call_args_list - first_call = post_message_calls[0].kwargs - - assert ( - first_call["text"] - == "This channel has been subscribed to the Insight *My Test subscription* on PostHog! πŸŽ‰\nThis subscription is sent every day. The next one will be sent on Wednesday February 02, 2022" - ) - - def test_subscription_dashboard_delivery(self, MockSlackIntegration: MagicMock) -> None: - mock_slack_integration = MagicMock() - MockSlackIntegration.return_value = mock_slack_integration - mock_slack_integration.client.chat_postMessage.return_value = {"ts": "1.234"} - - self.subscription = create_subscription( - team=self.team, - dashboard=self.dashboard, - created_by=self.user, - target_type="slack", - target_value="C12345|#test-channel", - ) - - send_slack_subscription_report(self.subscription, [self.asset, self.asset, self.asset], 10) - - assert mock_slack_integration.client.chat_postMessage.call_count == 4 - post_message_calls = mock_slack_integration.client.chat_postMessage.call_args_list - first_call = post_message_calls[0].kwargs - - assert first_call["channel"] == "C12345" - assert first_call["text"] == "Your subscription to the Dashboard *private dashboard* is ready! πŸŽ‰" - - assert first_call["blocks"] == [ - { - "type": "section", - "text": { - "type": "mrkdwn", - "text": "Your subscription to the Dashboard *private dashboard* is ready! πŸŽ‰", - }, - }, - { - "type": "image", - "image_url": post_message_calls[0].kwargs["blocks"][1]["image_url"], - "alt_text": "My Test subscription", - }, - { - "type": "section", - "text": {"type": "mrkdwn", "text": "_See 🧡 for more Insights_"}, - }, - {"type": "divider"}, - { - "type": "actions", - "elements": [ - { - "type": "button", - "text": {"type": "plain_text", "text": "View in PostHog"}, - "url": f"http://localhost:8010/dashboard/{self.dashboard.id}?utm_source=posthog&utm_campaign=subscription_report&utm_medium=slack", - }, - { - "type": "button", - "text": {"type": "plain_text", "text": "Manage Subscription"}, - "url": f"http://localhost:8010/dashboard/{self.dashboard.id}/subscriptions/{self.subscription.id}?utm_source=posthog&utm_campaign=subscription_report&utm_medium=slack", - }, - ], - }, - ] - - # Second call - other asset - second_call = post_message_calls[1].kwargs - assert second_call["channel"] == "C12345" - assert second_call["thread_ts"] == "1.234" - assert second_call["blocks"] == [ - { - "type": "image", - "image_url": second_call["blocks"][0]["image_url"], - "alt_text": "My Test subscription", - } - ] - - # Third call - other asset - third_call = post_message_calls[2].kwargs - assert third_call["blocks"] == [ - { - "type": "image", - "image_url": third_call["blocks"][0]["image_url"], - "alt_text": "My Test subscription", - } - ] - - # Fourth call - notice that more exists - fourth_call = post_message_calls[3].kwargs - assert fourth_call["blocks"] == [ - { - "type": "section", - "text": { - "type": "mrkdwn", - "text": f"Showing 3 of 10 Insights. <http://localhost:8010/dashboard/{self.dashboard.id}?utm_source=posthog&utm_campaign=subscription_report&utm_medium=slack|View the rest in PostHog>", - }, - } - ] - - def test_subscription_delivery_missing_integration(self, MockSlackIntegration: MagicMock) -> None: - mock_slack_integration = MagicMock() - MockSlackIntegration.return_value = mock_slack_integration - - self.integration.delete() - - send_slack_subscription_report(self.subscription, [self.asset], 1) - - assert mock_slack_integration.client.chat_postMessage.call_count == 0 - - # TODO: Should we perhaps save something to say the Subscription failed? diff --git a/ee/tasks/test/subscriptions/test_subscriptions.py b/ee/tasks/test/subscriptions/test_subscriptions.py deleted file mode 100644 index 5f6db8011b..0000000000 --- a/ee/tasks/test/subscriptions/test_subscriptions.py +++ /dev/null @@ -1,194 +0,0 @@ -from datetime import datetime -from unittest.mock import MagicMock, call, patch - -from zoneinfo import ZoneInfo -from freezegun import freeze_time - -from ee.tasks.subscriptions import ( - deliver_subscription_report, - handle_subscription_value_change, - schedule_all_subscriptions, -) -from ee.tasks.test.subscriptions.subscriptions_test_factory import create_subscription -from posthog.models.dashboard import Dashboard -from posthog.models.dashboard_tile import DashboardTile -from posthog.models.exported_asset import ExportedAsset -from posthog.models.insight import Insight -from posthog.models.instance_setting import set_instance_setting -from posthog.test.base import APIBaseTest - - -@patch("ee.tasks.subscriptions.send_slack_subscription_report") -@patch("ee.tasks.subscriptions.send_email_subscription_report") -@patch("ee.tasks.subscriptions.generate_assets") -@freeze_time("2022-02-02T08:55:00.000Z") -class TestSubscriptionsTasks(APIBaseTest): - dashboard: Dashboard - insight: Insight - tiles: list[DashboardTile] = None # type: ignore - asset: ExportedAsset - - def setUp(self) -> None: - self.dashboard = Dashboard.objects.create(team=self.team, name="private dashboard", created_by=self.user) - self.insight = Insight.objects.create(team=self.team, short_id="123456", name="My Test subscription") - self.asset = ExportedAsset.objects.create(team=self.team, insight_id=self.insight.id, export_format="image/png") - self.tiles = [] - for i in range(10): - insight = Insight.objects.create(team=self.team, short_id=f"{i}23456{i}", name=f"insight {i}") - self.tiles.append(DashboardTile.objects.create(dashboard=self.dashboard, insight=insight)) - - set_instance_setting("EMAIL_HOST", "fake_host") - set_instance_setting("EMAIL_ENABLED", True) - - @patch("ee.tasks.subscriptions.deliver_subscription_report") - def test_subscription_delivery_scheduling( - self, - mock_deliver_task: MagicMock, - mock_gen_assets: MagicMock, - mock_send_email: MagicMock, - mock_send_slack: MagicMock, - ) -> None: - with freeze_time("2022-02-02T08:30:00.000Z"): # Create outside of buffer before running - subscriptions = [ - create_subscription(team=self.team, insight=self.insight, created_by=self.user), - create_subscription(team=self.team, insight=self.insight, created_by=self.user), - create_subscription(team=self.team, dashboard=self.dashboard, created_by=self.user), - create_subscription( - team=self.team, - dashboard=self.dashboard, - created_by=self.user, - deleted=True, - ), - ] - # Modify a subscription to have its target time at least an hour ahead - subscriptions[2].start_date = datetime(2022, 1, 1, 10, 0).replace(tzinfo=ZoneInfo("UTC")) - subscriptions[2].save() - assert subscriptions[2].next_delivery_date == datetime(2022, 2, 2, 10, 0).replace(tzinfo=ZoneInfo("UTC")) - - schedule_all_subscriptions() - - self.assertCountEqual( - mock_deliver_task.delay.call_args_list, [call(subscriptions[0].id), call(subscriptions[1].id)] - ) - - @patch("ee.tasks.subscriptions.deliver_subscription_report") - def test_does_not_schedule_subscription_if_item_is_deleted( - self, - mock_deliver_task: MagicMock, - mock_gen_assets: MagicMock, - mock_send_email: MagicMock, - mock_send_slack: MagicMock, - ) -> None: - create_subscription( - team=self.team, - insight=self.insight, - created_by=self.user, - target_type="slack", - target_value="C12345|#test-channel", - ) - - create_subscription( - team=self.team, - dashboard=self.dashboard, - created_by=self.user, - target_type="slack", - target_value="C12345|#test-channel", - ) - - self.insight.deleted = True - self.insight.save() - self.dashboard.deleted = True - self.dashboard.save() - - schedule_all_subscriptions() - - assert mock_deliver_task.delay.call_count == 0 - - def test_deliver_subscription_report_email( - self, - mock_gen_assets: MagicMock, - mock_send_email: MagicMock, - mock_send_slack: MagicMock, - ) -> None: - subscription = create_subscription(team=self.team, insight=self.insight, created_by=self.user) - mock_gen_assets.return_value = [self.insight], [self.asset] - - deliver_subscription_report(subscription.id) - - assert mock_send_email.call_count == 2 - - assert mock_send_email.call_args_list == [ - call( - "test1@posthog.com", - subscription, - [self.asset], - invite_message=None, - total_asset_count=1, - ), - call( - "test2@posthog.com", - subscription, - [self.asset], - invite_message=None, - total_asset_count=1, - ), - ] - - def test_handle_subscription_value_change_email( - self, - mock_gen_assets: MagicMock, - mock_send_email: MagicMock, - mock_send_slack: MagicMock, - ) -> None: - subscription = create_subscription( - team=self.team, - insight=self.insight, - created_by=self.user, - target_value="test_existing@posthog.com,test_new@posthog.com", - ) - mock_gen_assets.return_value = [self.insight], [self.asset] - - handle_subscription_value_change( - subscription.id, - previous_value="test_existing@posthog.com", - invite_message="My invite message", - ) - - assert mock_send_email.call_count == 1 - - assert mock_send_email.call_args_list == [ - call( - "test_new@posthog.com", - subscription, - [self.asset], - invite_message="My invite message", - total_asset_count=1, - ) - ] - - def test_deliver_subscription_report_slack( - self, - mock_gen_assets: MagicMock, - mock_send_email: MagicMock, - mock_send_slack: MagicMock, - ) -> None: - subscription = create_subscription( - team=self.team, - insight=self.insight, - created_by=self.user, - target_type="slack", - target_value="C12345|#test-channel", - ) - mock_gen_assets.return_value = [self.insight], [self.asset] - - deliver_subscription_report(subscription.id) - - assert mock_send_slack.call_count == 1 - assert mock_send_slack.call_args_list == [ - call( - subscription, - [self.asset], - total_asset_count=1, - is_new_subscription=False, - ) - ] diff --git a/ee/tasks/test/subscriptions/test_subscriptions_utils.py b/ee/tasks/test/subscriptions/test_subscriptions_utils.py deleted file mode 100644 index edab23bbfb..0000000000 --- a/ee/tasks/test/subscriptions/test_subscriptions_utils.py +++ /dev/null @@ -1,96 +0,0 @@ -from unittest.mock import MagicMock, patch - -import pytest - -from ee.tasks.subscriptions.subscription_utils import ( - DEFAULT_MAX_ASSET_COUNT, - generate_assets, -) -from ee.tasks.test.subscriptions.subscriptions_test_factory import create_subscription -from posthog.models.dashboard import Dashboard -from posthog.models.dashboard_tile import DashboardTile -from posthog.models.exported_asset import ExportedAsset -from posthog.models.insight import Insight -from posthog.test.base import APIBaseTest - - -@patch("ee.tasks.subscriptions.subscription_utils.chain") -@patch("ee.tasks.subscriptions.subscription_utils.exporter.export_asset") -class TestSubscriptionsTasksUtils(APIBaseTest): - dashboard: Dashboard - insight: Insight - asset: ExportedAsset - tiles: list[DashboardTile] - - def setUp(self) -> None: - self.dashboard = Dashboard.objects.create(team=self.team, name="private dashboard", created_by=self.user) - self.insight = Insight.objects.create(team=self.team, short_id="123456", name="My Test subscription") - self.tiles = [] - for i in range(10): - insight = Insight.objects.create(team=self.team, short_id=f"insight-{i}", name="My Test subscription") - self.tiles.append(DashboardTile.objects.create(dashboard=self.dashboard, insight=insight)) - - self.subscription = create_subscription(team=self.team, insight=self.insight, created_by=self.user) - - def test_generate_assets_for_insight(self, mock_export_task: MagicMock, _mock_group: MagicMock) -> None: - with self.settings(PARALLEL_ASSET_GENERATION_MAX_TIMEOUT_MINUTES=1): - insights, assets = generate_assets(self.subscription) - - assert insights == [self.insight] - assert len(assets) == 1 - assert mock_export_task.si.call_count == 1 - - def test_generate_assets_for_dashboard(self, mock_export_task: MagicMock, _mock_group: MagicMock) -> None: - subscription = create_subscription(team=self.team, dashboard=self.dashboard, created_by=self.user) - - with self.settings(PARALLEL_ASSET_GENERATION_MAX_TIMEOUT_MINUTES=1): - insights, assets = generate_assets(subscription) - - assert len(insights) == len(self.tiles) - assert len(assets) == DEFAULT_MAX_ASSET_COUNT - assert mock_export_task.si.call_count == DEFAULT_MAX_ASSET_COUNT - - def test_raises_if_missing_resource(self, _mock_export_task: MagicMock, _mock_group: MagicMock) -> None: - subscription = create_subscription(team=self.team, created_by=self.user) - - with self.settings(PARALLEL_ASSET_GENERATION_MAX_TIMEOUT_MINUTES=1), pytest.raises(Exception) as e: - generate_assets(subscription) - - assert str(e.value) == "There are no insights to be sent for this Subscription" - - def test_excludes_deleted_insights_for_dashboard(self, mock_export_task: MagicMock, _mock_group: MagicMock) -> None: - for i in range(1, 10): - current_tile = self.tiles[i] - if current_tile.insight is None: - continue - current_tile.insight.deleted = True - current_tile.insight.save() - subscription = create_subscription(team=self.team, dashboard=self.dashboard, created_by=self.user) - - with self.settings(PARALLEL_ASSET_GENERATION_MAX_TIMEOUT_MINUTES=1): - insights, assets = generate_assets(subscription) - - assert len(insights) == 1 - assert len(assets) == 1 - assert mock_export_task.si.call_count == 1 - - def test_cancels_children_if_timed_out(self, _mock_export_task: MagicMock, mock_group: MagicMock) -> None: - # mock the group so that its children are never ready, - # and we capture calls to revoke - mock_running_exports = MagicMock() - mock_ready = MagicMock() - running_export_task = MagicMock() - - running_export_task.state = "PENDING" - - mock_ready.return_value = False - mock_group.return_value.apply_async.return_value = mock_running_exports - - mock_running_exports.children = [running_export_task] - mock_running_exports.ready = mock_ready - - with self.settings(PARALLEL_ASSET_GENERATION_MAX_TIMEOUT_MINUTES=0.01), pytest.raises(Exception) as e: - generate_assets(self.subscription) - - assert str(e.value) == "Timed out waiting for celery task to finish" - running_export_task.revoke.assert_called() diff --git a/ee/tasks/test/test_auto_rollback_feature_flag.py b/ee/tasks/test/test_auto_rollback_feature_flag.py deleted file mode 100644 index c9afe25850..0000000000 --- a/ee/tasks/test/test_auto_rollback_feature_flag.py +++ /dev/null @@ -1,205 +0,0 @@ -from unittest.mock import patch - -from freezegun import freeze_time - -from ee.tasks.auto_rollback_feature_flag import ( - calculate_rolling_average, - check_condition, - check_feature_flag_rollback_conditions, -) -from posthog.models.feature_flag import FeatureFlag -from posthog.test.base import APIBaseTest, ClickhouseTestMixin, _create_event - - -class AutoRollbackTest(ClickhouseTestMixin, APIBaseTest): - def test_calculate_rolling_average(self): - threshold_metric = { - "insight": "trends", - "events": [{"order": 0, "id": "$pageview"}], - } - - with freeze_time("2021-08-21T20:00:00.000Z"): - for _ in range(70): - _create_event( - event="$pageview", - distinct_id="1", - team=self.team, - timestamp="2021-08-21 05:00:00", - properties={"prop": 1}, - ) - - _create_event( - event="$pageview", - distinct_id="1", - team=self.team, - timestamp="2021-08-22 05:00:00", - properties={"prop": 1}, - ) - with freeze_time("2021-08-21T21:00:00.000Z"): - self.assertEqual( - calculate_rolling_average( - threshold_metric=threshold_metric, - team=self.team, - timezone="UTC", - ), - 10, # because we have 70 events in the last 7 days - ) - - with freeze_time("2021-08-22T21:00:00.000Z"): - self.assertEqual( - calculate_rolling_average( - threshold_metric=threshold_metric, - team=self.team, - timezone="UTC", - ), - 20, # because we have 140 events in the last 7 days - ) - - def test_check_condition(self): - rollback_condition = { - "threshold": 10, - "threshold_metric": { - "insight": "trends", - "events": [{"order": 0, "id": "$pageview"}], - }, - "operator": "lt", - "threshold_type": "insight", - } - - flag = FeatureFlag.objects.create( - team=self.team, - created_by=self.user, - key="test-ff", - rollout_percentage=50, - rollback_conditions=[rollback_condition], - ) - - self.assertEqual(check_condition(rollback_condition, flag), True) - - def test_check_condition_valid(self): - rollback_condition = { - "threshold": 15, - "threshold_metric": { - "insight": "trends", - "events": [{"order": 0, "id": "$pageview"}], - }, - "operator": "gt", - "threshold_type": "insight", - } - - for _ in range(70): - _create_event( - event="$pageview", - distinct_id="1", - team=self.team, - timestamp="2021-08-21 00:00:00", - properties={"prop": 1}, - ) - _create_event( - event="$pageview", - distinct_id="1", - team=self.team, - timestamp="2021-08-22 00:00:00", - properties={"prop": 1}, - ) - - with freeze_time("2021-08-21T20:00:00.000Z"): - flag = FeatureFlag.objects.create( - team=self.team, - created_by=self.user, - key="test-ff", - rollout_percentage=50, - rollback_conditions=[rollback_condition], - ) - - with freeze_time("2021-08-21T20:00:00.000Z"): - self.assertEqual(check_condition(rollback_condition, flag), False) - - # Go another day with 0 events - with freeze_time("2021-08-22T20:00:00.000Z"): - self.assertEqual(check_condition(rollback_condition, flag), True) - - def test_feature_flag_rolledback(self): - rollback_condition = { - "threshold": 15, - "threshold_metric": { - "insight": "trends", - "events": [{"order": 0, "id": "$pageview"}], - }, - "operator": "gt", - "threshold_type": "insight", - } - - for _ in range(70): - _create_event( - event="$pageview", - distinct_id="1", - team=self.team, - timestamp="2021-08-21 00:00:00", - properties={"prop": 1}, - ) - _create_event( - event="$pageview", - distinct_id="1", - team=self.team, - timestamp="2021-08-22 00:00:00", - properties={"prop": 1}, - ) - - with freeze_time("2021-08-21T00:00:00.000Z"): - flag = FeatureFlag.objects.create( - team=self.team, - created_by=self.user, - key="test-ff", - rollout_percentage=50, - rollback_conditions=[rollback_condition], - ) - - flag = FeatureFlag.objects.get(pk=flag.pk) - self.assertEqual(flag.performed_rollback, None) - self.assertEqual(flag.active, True) - - with freeze_time("2021-08-23T20:00:00.000Z"): - check_feature_flag_rollback_conditions(feature_flag_id=flag.pk) - flag = FeatureFlag.objects.get(pk=flag.pk) - self.assertEqual(flag.performed_rollback, True) - self.assertEqual(flag.active, False) - - @patch("ee.tasks.auto_rollback_feature_flag.get_stats_for_timerange") - def test_check_condition_sentry(self, stats_for_timerange): - rollback_condition = { - "threshold": 1.25, - "threshold_metric": {}, - "operator": "gt", - "threshold_type": "sentry", - } - - with freeze_time("2021-08-21T20:00:00.000Z"): - flag = FeatureFlag.objects.create( - team=self.team, - created_by=self.user, - key="test-ff", - rollout_percentage=50, - rollback_conditions=[rollback_condition], - ) - - stats_for_timerange.return_value = (100, 130) - with freeze_time("2021-08-23T20:00:00.000Z"): - self.assertEqual(check_condition(rollback_condition, flag), True) - stats_for_timerange.assert_called_once_with( - "2021-08-21T20:00:00", - "2021-08-22T20:00:00", - "2021-08-22T20:00:00", - "2021-08-23T20:00:00", - ) - - stats_for_timerange.reset_mock() - stats_for_timerange.return_value = (100, 120) - with freeze_time("2021-08-25T13:00:00.000Z"): - self.assertEqual(check_condition(rollback_condition, flag), False) - stats_for_timerange.assert_called_once_with( - "2021-08-21T20:00:00", - "2021-08-22T20:00:00", - "2021-08-24T13:00:00", - "2021-08-25T13:00:00", - ) diff --git a/ee/tasks/test/test_calculate_cohort.py b/ee/tasks/test/test_calculate_cohort.py deleted file mode 100644 index ed0dd3e429..0000000000 --- a/ee/tasks/test/test_calculate_cohort.py +++ /dev/null @@ -1,541 +0,0 @@ -import json -import urllib.parse -from unittest.mock import patch - -from freezegun import freeze_time - -from posthog.client import sync_execute -from posthog.models.cohort import Cohort -from posthog.models.person import Person -from posthog.tasks.calculate_cohort import insert_cohort_from_insight_filter -from posthog.tasks.test.test_calculate_cohort import calculate_cohort_test_factory -from posthog.test.base import ClickhouseTestMixin, _create_event, _create_person - - -class TestClickhouseCalculateCohort(ClickhouseTestMixin, calculate_cohort_test_factory(_create_event, _create_person)): # type: ignore - @patch("posthog.tasks.calculate_cohort.insert_cohort_from_insight_filter.delay") - def test_create_stickiness_cohort(self, _insert_cohort_from_insight_filter): - _create_person(team_id=self.team.pk, distinct_ids=["blabla"]) - _create_event( - team=self.team, - event="$pageview", - distinct_id="blabla", - properties={"$math_prop": 1}, - timestamp="2021-01-01T12:00:00Z", - ) - response = self.client.post( - f"/api/projects/{self.team.id}/cohorts/?insight=STICKINESS&properties=%5B%5D&interval=day&display=ActionsLineGraph&events=%5B%7B%22id%22%3A%22%24pageview%22%2C%22name%22%3A%22%24pageview%22%2C%22type%22%3A%22events%22%2C%22order%22%3A0%7D%5D&shown_as=Stickiness&date_from=2021-01-01&entity_id=%24pageview&entity_type=events&stickiness_days=1&label=%24pageview", - {"name": "test", "is_static": True}, - ).json() - - cohort_id = response["id"] - - _insert_cohort_from_insight_filter.assert_called_once_with( - cohort_id, - { - "insight": "STICKINESS", - "properties": "[]", - "interval": "day", - "display": "ActionsLineGraph", - "events": '[{"id":"$pageview","name":"$pageview","type":"events","order":0}]', - "shown_as": "Stickiness", - "date_from": "2021-01-01", - "entity_id": "$pageview", - "entity_type": "events", - "stickiness_days": "1", - "label": "$pageview", - }, - self.team.pk, - ) - - insert_cohort_from_insight_filter( - cohort_id, - { - "date_from": "2021-01-01", - "events": [ - { - "id": "$pageview", - "type": "events", - "order": 0, - "name": "$pageview", - "custom_name": None, - "math": None, - "math_hogql": None, - "math_property": None, - "math_group_type_index": None, - "properties": [], - } - ], - "insight": "STICKINESS", - "interval": "day", - "selected_interval": 1, - "shown_as": "Stickiness", - "entity_id": "$pageview", - "entity_type": "events", - "entity_math": None, - }, - ) - cohort = Cohort.objects.get(pk=cohort_id) - people = Person.objects.filter(cohort__id=cohort.pk) - self.assertEqual(people.count(), 1) - self.assertEqual(cohort.count, 1) - - @patch("posthog.tasks.calculate_cohort.insert_cohort_from_insight_filter.delay") - def test_create_trends_cohort(self, _insert_cohort_from_insight_filter): - _create_person(team_id=self.team.pk, distinct_ids=["blabla"]) - with freeze_time("2021-01-01 00:06:34"): - _create_event( - team=self.team, - event="$pageview", - distinct_id="blabla", - properties={"$math_prop": 1}, - timestamp="2021-01-01T12:00:00Z", - ) - - with freeze_time("2021-01-02 00:06:34"): - _create_event( - team=self.team, - event="$pageview", - distinct_id="blabla", - properties={"$math_prop": 4}, - timestamp="2021-01-01T12:00:00Z", - ) - - response = self.client.post( - f"/api/projects/{self.team.id}/cohorts/?interval=day&display=ActionsLineGraph&events=%5B%7B%22id%22%3A%22%24pageview%22%2C%22name%22%3A%22%24pageview%22%2C%22type%22%3A%22events%22%2C%22order%22%3A0%7D%5D&properties=%5B%5D&entity_id=%24pageview&entity_type=events&date_from=2021-01-01&date_to=2021-01-01&label=%24pageview", - {"name": "test", "is_static": True}, - ).json() - cohort_id = response["id"] - _insert_cohort_from_insight_filter.assert_called_once_with( - cohort_id, - { - "interval": "day", - "display": "ActionsLineGraph", - "events": '[{"id":"$pageview","name":"$pageview","type":"events","order":0}]', - "properties": "[]", - "entity_id": "$pageview", - "entity_type": "events", - "date_from": "2021-01-01", - "date_to": "2021-01-01", - "label": "$pageview", - }, - self.team.pk, - ) - insert_cohort_from_insight_filter( - cohort_id, - { - "date_from": "2021-01-01", - "date_to": "2021-01-01", - "display": "ActionsLineGraph", - "events": [ - { - "id": "$pageview", - "type": "events", - "order": 0, - "name": "$pageview", - "math": None, - "math_hogql": None, - "math_property": None, - "math_group_type_index": None, - "properties": [], - } - ], - "entity_id": "$pageview", - "entity_type": "events", - "insight": "TRENDS", - "interval": "day", - }, - ) - cohort = Cohort.objects.get(pk=cohort_id) - people = Person.objects.filter(cohort__id=cohort.pk) - self.assertEqual(cohort.errors_calculating, 0) - self.assertEqual( - people.count(), - 1, - { - "a": sync_execute( - "select person_id from person_static_cohort where team_id = {} and cohort_id = {} ".format( - self.team.id, cohort.pk - ) - ), - "b": sync_execute( - "select person_id from person_static_cohort FINAL where team_id = {} and cohort_id = {} ".format( - self.team.id, cohort.pk - ) - ), - }, - ) - self.assertEqual(cohort.count, 1) - - @patch("posthog.tasks.calculate_cohort.insert_cohort_from_insight_filter.delay") - def test_create_trends_cohort_arg_test(self, _insert_cohort_from_insight_filter): - # prior to 8124, subtitute parameters was called on insight cohorting which caused '%' in LIKE arguments to be interepreted as a missing parameter - - _create_person(team_id=self.team.pk, distinct_ids=["blabla"]) - with freeze_time("2021-01-01 00:06:34"): - _create_event( - team=self.team, - event="$pageview", - distinct_id="blabla", - properties={"$domain": "https://app.posthog.com/123"}, - timestamp="2021-01-01T12:00:00Z", - ) - - with freeze_time("2021-01-02 00:06:34"): - _create_event( - team=self.team, - event="$pageview", - distinct_id="blabla", - properties={"$domain": "https://app.posthog.com/123"}, - timestamp="2021-01-01T12:00:00Z", - ) - - params = { - "date_from": "2021-01-01", - "date_to": "2021-01-01", - "display": "ActionsLineGraph", - "events": json.dumps([{"id": "$pageview", "name": "$pageview", "type": "events", "order": 0}]), - "entity_id": "$pageview", - "entity_type": "events", - "insight": "TRENDS", - "interval": "day", - "properties": json.dumps( - [ - { - "key": "$domain", - "value": "app.posthog.com", - "operator": "icontains", - "type": "event", - } - ] - ), - } - - response = self.client.post( - f"/api/projects/{self.team.id}/cohorts/?{urllib.parse.urlencode(params)}", - {"name": "test", "is_static": True}, - ).json() - cohort_id = response["id"] - - _insert_cohort_from_insight_filter.assert_called_once_with( - cohort_id, - { - "date_from": "2021-01-01", - "date_to": "2021-01-01", - "display": "ActionsLineGraph", - "events": '[{"id": "$pageview", "name": "$pageview", "type": "events", "order": 0}]', - "entity_id": "$pageview", - "entity_type": "events", - "insight": "TRENDS", - "interval": "day", - "properties": '[{"key": "$domain", "value": "app.posthog.com", "operator": "icontains", "type": "event"}]', - }, - self.team.pk, - ) - insert_cohort_from_insight_filter( - cohort_id, - { - "date_from": "2021-01-01", - "date_to": "2021-01-01", - "display": "ActionsLineGraph", - "events": [ - { - "id": "$pageview", - "type": "events", - "order": 0, - "name": "$pageview", - "math": None, - "math_hogql": None, - "math_property": None, - "math_group_type_index": None, - "properties": [], - } - ], - "properties": [ - { - "key": "$domain", - "value": "app.posthog.com", - "operator": "icontains", - "type": "event", - } - ], - "entity_id": "$pageview", - "entity_type": "events", - "insight": "TRENDS", - "interval": "day", - }, - ) - cohort = Cohort.objects.get(pk=cohort_id) - people = Person.objects.filter(cohort__id=cohort.pk) - self.assertEqual(cohort.errors_calculating, 0) - self.assertEqual( - people.count(), - 1, - { - "a": sync_execute( - "select person_id from person_static_cohort where team_id = {} and cohort_id = {} ".format( - self.team.id, cohort.pk - ) - ), - "b": sync_execute( - "select person_id from person_static_cohort FINAL where team_id = {} and cohort_id = {} ".format( - self.team.id, cohort.pk - ) - ), - }, - ) - self.assertEqual(cohort.count, 1) - - @patch("posthog.tasks.calculate_cohort.insert_cohort_from_insight_filter.delay") - def test_create_funnels_cohort(self, _insert_cohort_from_insight_filter): - _create_person(team_id=self.team.pk, distinct_ids=["blabla"]) - with freeze_time("2021-01-01 00:06:34"): - _create_event( - team=self.team, - event="$pageview", - distinct_id="blabla", - properties={"$math_prop": 1}, - timestamp="2021-01-01T12:00:00Z", - ) - - with freeze_time("2021-01-02 00:06:34"): - _create_event( - team=self.team, - event="$another_view", - distinct_id="blabla", - properties={"$math_prop": 4}, - timestamp="2021-01-02T12:00:00Z", - ) - - params = { - "insight": "FUNNELS", - "events": json.dumps( - [ - { - "id": "$pageview", - "math": None, - "name": "$pageview", - "type": "events", - "order": 0, - "properties": [], - "math_hogql": None, - "math_property": None, - }, - { - "id": "$another_view", - "math": None, - "name": "$another_view", - "type": "events", - "order": 1, - "properties": [], - "math_hogql": None, - "math_property": None, - }, - ] - ), - "display": "FunnelViz", - "interval": "day", - "layout": "horizontal", - "date_from": "2021-01-01", - "date_to": "2021-01-07", - "funnel_step": 1, - } - - response = self.client.post( - f"/api/projects/{self.team.id}/cohorts/?{urllib.parse.urlencode(params)}", - {"name": "test", "is_static": True}, - ).json() - - cohort_id = response["id"] - - _insert_cohort_from_insight_filter.assert_called_once_with( - cohort_id, - { - "insight": "FUNNELS", - "events": '[{"id": "$pageview", "math": null, "name": "$pageview", "type": "events", "order": 0, "properties": [], "math_hogql": null, "math_property": null}, {"id": "$another_view", "math": null, "name": "$another_view", "type": "events", "order": 1, "properties": [], "math_hogql": null, "math_property": null}]', - "display": "FunnelViz", - "interval": "day", - "layout": "horizontal", - "date_from": "2021-01-01", - "date_to": "2021-01-07", - "funnel_step": "1", - }, - self.team.pk, - ) - - insert_cohort_from_insight_filter(cohort_id, params) - - cohort = Cohort.objects.get(pk=cohort_id) - people = Person.objects.filter(cohort__id=cohort.pk) - self.assertEqual(cohort.errors_calculating, 0) - self.assertEqual(people.count(), 1) - self.assertEqual(cohort.count, 1) - - @patch("posthog.tasks.calculate_cohort.insert_cohort_from_insight_filter.delay") - def test_create_lifecycle_cohort(self, _insert_cohort_from_insight_filter): - def _create_events(data, event="$pageview"): - person_result = [] - for id, timestamps in data: - with freeze_time(timestamps[0]): - person_result.append( - _create_person( - team_id=self.team.pk, - distinct_ids=[id], - properties={ - "name": id, - **({"email": "test@posthog.com"} if id == "p1" else {}), - }, - ) - ) - for timestamp in timestamps: - _create_event(team=self.team, event=event, distinct_id=id, timestamp=timestamp) - return person_result - - people = _create_events( - data=[ - ( - "p1", - [ - "2020-01-11T12:00:00Z", - "2020-01-12T12:00:00Z", - "2020-01-13T12:00:00Z", - "2020-01-15T12:00:00Z", - "2020-01-17T12:00:00Z", - "2020-01-19T12:00:00Z", - ], - ), - ("p2", ["2020-01-09T12:00:00Z", "2020-01-12T12:00:00Z"]), - ("p3", ["2020-01-12T12:00:00Z"]), - ("p4", ["2020-01-15T12:00:00Z"]), - ] - ) - - query_params = { - "date_from": "2020-01-12T00:00:00Z", - "date_to": "2020-01-19T00:00:00Z", - "events": json.dumps([{"id": "$pageview", "type": "events", "order": 0}]), - "insight": "LIFECYCLE", - "interval": "day", - "shown_as": "Lifecycle", - "smoothing_intervals": 1, - "entity_id": "$pageview", - "entity_type": "events", - "entity_math": "total", - "target_date": "2020-01-13", - "entity_order": 0, - "lifecycle_type": "returning", - } - - response = self.client.post( - f"/api/cohort/?{urllib.parse.urlencode(query_params)}", - data={"is_static": True, "name": "lifecycle_static_cohort_returning"}, - ).json() - cohort_id = response["id"] - - _insert_cohort_from_insight_filter.assert_called_once_with( - cohort_id, - { - "date_from": "2020-01-12T00:00:00Z", - "date_to": "2020-01-19T00:00:00Z", - "events": '[{"id": "$pageview", "type": "events", "order": 0}]', - "insight": "LIFECYCLE", - "interval": "day", - "shown_as": "Lifecycle", - "smoothing_intervals": "1", - "entity_id": "$pageview", - "entity_type": "events", - "entity_math": "total", - "target_date": "2020-01-13", - "entity_order": "0", - "lifecycle_type": "returning", - }, - self.team.pk, - ) - - insert_cohort_from_insight_filter( - cohort_id, - { - "date_from": "2020-01-12T00:00:00Z", - "date_to": "2020-01-19T00:00:00Z", - "events": [{"id": "$pageview", "type": "events", "order": 0}], - "insight": "LIFECYCLE", - "interval": "day", - "shown_as": "Lifecycle", - "smoothing_intervals": "1", - "entity_id": "$pageview", - "entity_type": "events", - "entity_math": "total", - "target_date": "2020-01-13", - "entity_order": "0", - "lifecycle_type": "returning", - }, - ) - cohort = Cohort.objects.get(pk=response["id"]) - people_result = Person.objects.filter(cohort__id=cohort.pk).values_list("id", flat=True) - self.assertIn(people[0].id, people_result) - - query_params = { - "date_from": "2020-01-12T00:00:00Z", - "date_to": "2020-01-19T00:00:00Z", - "events": json.dumps([{"id": "$pageview", "type": "events", "order": 0}]), - "insight": "LIFECYCLE", - "interval": "day", - "shown_as": "Lifecycle", - "smoothing_intervals": 1, - "entity_id": "$pageview", - "entity_type": "events", - "entity_math": "total", - "target_date": "2020-01-13", - "entity_order": 0, - "lifecycle_type": "dormant", - } - response = self.client.post( - f"/api/cohort/?{urllib.parse.urlencode(query_params)}", - data={"is_static": True, "name": "lifecycle_static_cohort_dormant"}, - ).json() - cohort_id = response["id"] - - _insert_cohort_from_insight_filter.assert_called_with( - cohort_id, - { - "date_from": "2020-01-12T00:00:00Z", - "date_to": "2020-01-19T00:00:00Z", - "events": '[{"id": "$pageview", "type": "events", "order": 0}]', - "insight": "LIFECYCLE", - "interval": "day", - "shown_as": "Lifecycle", - "smoothing_intervals": "1", - "entity_id": "$pageview", - "entity_type": "events", - "entity_math": "total", - "target_date": "2020-01-13", - "entity_order": "0", - "lifecycle_type": "dormant", - }, - self.team.pk, - ) - self.assertEqual(_insert_cohort_from_insight_filter.call_count, 2) - - insert_cohort_from_insight_filter( - cohort_id, - { - "date_from": "2020-01-12T00:00:00Z", - "date_to": "2020-01-19T00:00:00Z", - "events": [{"id": "$pageview", "type": "events", "order": 0}], - "insight": "LIFECYCLE", - "interval": "day", - "shown_as": "Lifecycle", - "smoothing_intervals": "1", - "entity_id": "$pageview", - "entity_type": "events", - "entity_math": "total", - "target_date": "2020-01-13", - "entity_order": "0", - "lifecycle_type": "dormant", - }, - ) - - cohort = Cohort.objects.get(pk=response["id"]) - self.assertEqual(cohort.count, 2) - people_result = Person.objects.filter(cohort__id=cohort.pk).values_list("id", flat=True) - self.assertCountEqual([people[1].id, people[2].id], people_result) diff --git a/ee/tasks/test/test_send_license_usage.py b/ee/tasks/test/test_send_license_usage.py deleted file mode 100644 index 441179c2c3..0000000000 --- a/ee/tasks/test/test_send_license_usage.py +++ /dev/null @@ -1,317 +0,0 @@ -from unittest.mock import ANY, Mock, patch - -from freezegun import freeze_time - -from ee.api.test.base import LicensedTestMixin -from ee.models.license import License -from ee.tasks.send_license_usage import send_license_usage -from posthog.models.team import Team -from posthog.test.base import ( - APIBaseTest, - ClickhouseDestroyTablesMixin, - _create_event, - flush_persons_and_events, -) - - -class SendLicenseUsageTest(LicensedTestMixin, ClickhouseDestroyTablesMixin, APIBaseTest): - @freeze_time("2021-10-10T23:01:00Z") - @patch("posthoganalytics.capture") - @patch("requests.post") - def test_send_license_usage(self, mock_post, mock_capture): - self.license.key = "legacy-key" - self.license.save() - team2 = Team.objects.create(organization=self.organization) - _create_event( - event="$pageview", - team=self.team, - distinct_id=1, - timestamp="2021-10-08T14:01:01Z", - ) - _create_event( - event="$pageview", - team=self.team, - distinct_id=1, - timestamp="2021-10-09T12:01:01Z", - ) - _create_event( - event="$pageview", - team=self.team, - distinct_id=1, - timestamp="2021-10-09T13:01:01Z", - ) - _create_event( - event="$$internal_metrics_shouldnt_be_billed", - team=self.team, - distinct_id=1, - timestamp="2021-10-09T13:01:01Z", - ) - _create_event( - event="$pageview", - team=team2, - distinct_id=1, - timestamp="2021-10-09T14:01:01Z", - ) - _create_event( - event="$pageview", - team=self.team, - distinct_id=1, - timestamp="2021-10-10T14:01:01Z", - ) - flush_persons_and_events() - - mockresponse = Mock() - mock_post.return_value = mockresponse - mockresponse.json = lambda: {"ok": True, "valid_until": "2021-11-10T23:01:00Z"} - - send_license_usage() - mock_post.assert_called_once_with( - "https://license.posthog.com/licenses/usage", - data={"date": "2021-10-09", "key": self.license.key, "events_count": 3}, - ) - mock_capture.assert_called_once_with( - self.user.distinct_id, - "send license usage data", - { - "date": "2021-10-09", - "events_count": 3, - "license_keys": [self.license.key], - "organization_name": "Test", - }, - groups={"instance": ANY, "organization": str(self.organization.id)}, - ) - self.assertEqual(License.objects.get().valid_until.isoformat(), "2021-11-10T23:01:00+00:00") - - @freeze_time("2021-10-10T23:01:00Z") - @patch("posthoganalytics.capture") - @patch("ee.tasks.send_license_usage.sync_execute", side_effect=Exception()) - def test_send_license_error(self, mock_post, mock_capture): - self.license.key = "legacy-key" - self.license.save() - - team2 = Team.objects.create(organization=self.organization) - _create_event( - event="$pageview", - team=self.team, - distinct_id=1, - timestamp="2021-10-08T14:01:01Z", - ) - _create_event( - event="$pageview", - team=self.team, - distinct_id=1, - timestamp="2021-10-09T12:01:01Z", - ) - _create_event( - event="$pageview", - team=self.team, - distinct_id=1, - timestamp="2021-10-09T13:01:01Z", - ) - _create_event( - event="$$internal_metrics_shouldnt_be_billed", - team=self.team, - distinct_id=1, - timestamp="2021-10-09T13:01:01Z", - ) - _create_event( - event="$pageview", - team=team2, - distinct_id=1, - timestamp="2021-10-09T14:01:01Z", - ) - _create_event( - event="$pageview", - team=self.team, - distinct_id=1, - timestamp="2021-10-10T14:01:01Z", - ) - flush_persons_and_events() - with self.assertRaises(Exception): - send_license_usage() - mock_capture.assert_called_once_with( - self.user.distinct_id, - "send license usage data error", - {"error": "", "date": "2021-10-09", "organization_name": "Test"}, - groups={"instance": ANY, "organization": str(self.organization.id)}, - ) - - @freeze_time("2021-10-10T23:01:00Z") - @patch("posthoganalytics.capture") - @patch("requests.post") - def test_send_license_usage_already_sent(self, mock_post, mock_capture): - self.license.key = "legacy-key" - self.license.save() - - team2 = Team.objects.create(organization=self.organization) - _create_event( - event="$pageview", - team=self.team, - distinct_id=1, - timestamp="2021-10-08T14:01:01Z", - ) - _create_event( - event="$pageview", - team=self.team, - distinct_id=1, - timestamp="2021-10-09T12:01:01Z", - ) - _create_event( - event="$pageview", - team=self.team, - distinct_id=1, - timestamp="2021-10-09T13:01:01Z", - ) - _create_event( - event="$$internal_metrics_shouldnt_be_billed", - team=self.team, - distinct_id=1, - timestamp="2021-10-09T13:01:01Z", - ) - _create_event( - event="$pageview", - team=team2, - distinct_id=1, - timestamp="2021-10-09T14:01:01Z", - ) - _create_event( - event="$pageview", - team=self.team, - distinct_id=1, - timestamp="2021-10-10T14:01:01Z", - ) - mockresponse = Mock() - mock_post.return_value = mockresponse - mockresponse.ok = False - mockresponse.status_code = 400 - mockresponse.json = lambda: { - "code": "already_sent", - "error": "Usage data for this period has already been sent.", - } - flush_persons_and_events() - send_license_usage() - mock_capture.assert_not_called() - - @freeze_time("2021-10-10T23:01:00Z") - @patch("posthoganalytics.capture") - @patch("requests.post") - def test_send_license_not_found(self, mock_post, mock_capture): - self.license.key = "legacy-key" - self.license.save() - - team2 = Team.objects.create(organization=self.organization) - _create_event( - event="$pageview", - team=self.team, - distinct_id=1, - timestamp="2021-10-08T14:01:01Z", - ) - _create_event( - event="$pageview", - team=self.team, - distinct_id=1, - timestamp="2021-10-09T12:01:01Z", - ) - _create_event( - event="$pageview", - team=self.team, - distinct_id=1, - timestamp="2021-10-09T13:01:01Z", - ) - _create_event( - event="$$internal_metrics_shouldnt_be_billed", - team=self.team, - distinct_id=1, - timestamp="2021-10-09T13:01:01Z", - ) - _create_event( - event="$pageview", - team=team2, - distinct_id=1, - timestamp="2021-10-09T14:01:01Z", - ) - _create_event( - event="$pageview", - team=self.team, - distinct_id=1, - timestamp="2021-10-10T14:01:01Z", - ) - flush_persons_and_events() - flush_persons_and_events() - - mockresponse = Mock() - mock_post.return_value = mockresponse - mockresponse.status_code = 404 - mockresponse.ok = False - mockresponse.json = lambda: {"code": "not_found"} - mockresponse.content = "" - - send_license_usage() - - mock_capture.assert_called_once_with( - self.user.distinct_id, - "send license usage data error", - { - "error": "", - "date": "2021-10-09", - "organization_name": "Test", - "status_code": 404, - "events_count": 3, - }, - groups={"instance": ANY, "organization": str(self.organization.id)}, - ) - self.assertEqual(License.objects.get().valid_until.isoformat(), "2021-10-10T22:01:00+00:00") - - @freeze_time("2021-10-10T23:01:00Z") - @patch("posthoganalytics.capture") - @patch("requests.post") - def test_send_license_not_triggered_for_v2_licenses(self, mock_post, mock_capture): - self.license.key = "billing-service::v2-key" - self.license.save() - - send_license_usage() - - assert mock_capture.call_count == 0 - - -class SendLicenseUsageNoLicenseTest(APIBaseTest): - @freeze_time("2021-10-10T23:01:00Z") - @patch("requests.post") - def test_no_license(self, mock_post): - # Same test, we just don't include the LicensedTestMixin so no license - _create_event( - event="$pageview", - team=self.team, - distinct_id=1, - timestamp="2021-10-08T14:01:01Z", - ) - _create_event( - event="$pageview", - team=self.team, - distinct_id=1, - timestamp="2021-10-09T12:01:01Z", - ) - _create_event( - event="$pageview", - team=self.team, - distinct_id=1, - timestamp="2021-10-09T13:01:01Z", - ) - _create_event( - event="$pageview", - team=self.team, - distinct_id=1, - timestamp="2021-10-09T14:01:01Z", - ) - _create_event( - event="$pageview", - team=self.team, - distinct_id=1, - timestamp="2021-10-10T14:01:01Z", - ) - - flush_persons_and_events() - - send_license_usage() - - self.assertEqual(mock_post.call_count, 0) diff --git a/ee/tasks/test/test_slack.py b/ee/tasks/test/test_slack.py deleted file mode 100644 index 64b227d7d1..0000000000 --- a/ee/tasks/test/test_slack.py +++ /dev/null @@ -1,97 +0,0 @@ -from unittest.mock import MagicMock, patch - -from freezegun import freeze_time - -from ee.tasks.slack import handle_slack_event -from posthog import settings -from posthog.models.dashboard import Dashboard -from posthog.models.exported_asset import ExportedAsset -from posthog.models.insight import Insight -from posthog.models.integration import Integration -from posthog.models.sharing_configuration import SharingConfiguration -from posthog.models.subscription import Subscription -from posthog.test.base import APIBaseTest - - -def create_mock_unfurl_event(team_id: str, links: list[str]): - return { - "token": "XXYYZZ", - "team_id": team_id, - "api_app_id": "AXXXXXXXXX", - "event": { - "type": "link_shared", - "channel": "Cxxxxxx", - "is_bot_user_member": True, - "user": "Uxxxxxxx", - "message_ts": "123456789.9875", - "unfurl_id": "C123456.123456789.987501.1b90fa1278528ce6e2f6c5c2bfa1abc9a41d57d02b29d173f40399c9ffdecf4b", - "event_ts": "123456621.1855", - "source": "conversations_history", - "links": [{"domain": "app.posthog.com", "url": link} for link in links], - }, - "type": "event_callback", - "authed_users": ["UXXXXXXX1", "UXXXXXXX2"], - "event_id": "Ev08MFMKH6", - "event_time": 123456789, - } - - -@patch("ee.tasks.slack.generate_assets") -@patch("ee.tasks.slack.SlackIntegration") -@freeze_time("2022-01-01T12:00:00.000Z") -class TestSlackSubscriptionsTasks(APIBaseTest): - subscription: Subscription - dashboard: Dashboard - insight: Insight - asset: ExportedAsset - integration: Integration - - def setUp(self) -> None: - self.insight = Insight.objects.create(team=self.team, short_id="123456", name="My Test subscription") - self.sharingconfig = SharingConfiguration.objects.create(team=self.team, insight=self.insight, enabled=True) - self.integration = Integration.objects.create(team=self.team, kind="slack", config={"team": {"id": "T12345"}}) - self.asset = ExportedAsset.objects.create(team=self.team, export_format="image/png", insight=self.insight) - - def test_unfurl_event(self, MockSlackIntegration: MagicMock, mock_generate_assets: MagicMock) -> None: - mock_slack_integration = MagicMock() - MockSlackIntegration.return_value = mock_slack_integration - mock_generate_assets.return_value = ([self.insight], [self.asset]) - mock_slack_integration.client.chat_unfurl.return_value = {"ok": "true"} - - handle_slack_event( - create_mock_unfurl_event( - "T12345", - [ - f"{settings.SITE_URL}/shared/{self.sharingconfig.access_token}", - f"{settings.SITE_URL}/shared/not-found", - ], - ) - ) - - assert mock_slack_integration.client.chat_unfurl.call_count == 1 - post_message_calls = mock_slack_integration.client.chat_unfurl.call_args_list - first_call = post_message_calls[0].kwargs - - valid_url = f"{settings.SITE_URL}/shared/{self.sharingconfig.access_token}" - - assert first_call == { - "unfurls": { - valid_url: { - "blocks": [ - { - "type": "section", - "text": {"type": "mrkdwn", "text": "My Test subscription"}, - "accessory": { - "type": "image", - "image_url": first_call["unfurls"][valid_url]["blocks"][0]["accessory"]["image_url"], - "alt_text": "My Test subscription", - }, - } - ] - } - }, - "unfurl_id": "C123456.123456789.987501.1b90fa1278528ce6e2f6c5c2bfa1abc9a41d57d02b29d173f40399c9ffdecf4b", - "source": "conversations_history", - "channel": "", - "ts": "", - } diff --git a/ee/test/fixtures/performance_event_fixtures.py b/ee/test/fixtures/performance_event_fixtures.py deleted file mode 100644 index 54723fd5d5..0000000000 --- a/ee/test/fixtures/performance_event_fixtures.py +++ /dev/null @@ -1,47 +0,0 @@ -import uuid -from datetime import datetime -from typing import Optional - -from posthog.kafka_client.client import ClickhouseProducer -from posthog.kafka_client.topics import KAFKA_PERFORMANCE_EVENTS -from posthog.models.performance.sql import PERFORMANCE_EVENT_DATA_TABLE -from posthog.utils import cast_timestamp_or_now - - -def create_performance_event( - team_id: int, - distinct_id: str, - session_id: str, - window_id: str = "window_1", - current_url: str = "https://posthog.com", - timestamp: Optional[datetime] = None, - entry_type="resource", - **kwargs, -) -> str: - timestamp_str = cast_timestamp_or_now(timestamp) - - data = { - "uuid": str(uuid.uuid4()), - "team_id": team_id, - "distinct_id": distinct_id, - "session_id": session_id, - "window_id": window_id, - "pageview_id": window_id, - "current_url": current_url, - "timestamp": timestamp_str, - "entry_type": entry_type, - "name": "https://posthog.com/static/js/1.0.0/PostHog.js", - } - - data.update(kwargs) - - selects = [f"%({x})s" for x in data.keys()] - sql = f""" -INSERT INTO {PERFORMANCE_EVENT_DATA_TABLE()} ({', '.join(data.keys()) }, _timestamp, _offset) -SELECT {', '.join(selects) }, now(), 0 -""" - - p = ClickhouseProducer() - p.produce(sql=sql, topic=KAFKA_PERFORMANCE_EVENTS, data=data) - - return str(uuid) diff --git a/ee/urls.py b/ee/urls.py deleted file mode 100644 index 91b58e0fcb..0000000000 --- a/ee/urls.py +++ /dev/null @@ -1,113 +0,0 @@ -from typing import Any - -from django.conf import settings -from django.contrib import admin -from django.urls import include -from django.urls.conf import path - -from ee.api import integration - -from .api import ( - authentication, - billing, - conversation, - dashboard_collaborator, - explicit_team_member, - feature_flag_role_access, - hooks, - license, - sentry_stats, - subscription, -) -from .api.rbac import organization_resource_access, role -from .session_recordings import session_recording_playlist - - -def extend_api_router() -> None: - from posthog.api import ( - environment_dashboards_router, - environments_router, - legacy_project_dashboards_router, - organizations_router, - project_feature_flags_router, - projects_router, - register_grandfathered_environment_nested_viewset, - router as root_router, - ) - - root_router.register(r"billing", billing.BillingViewset, "billing") - root_router.register(r"license", license.LicenseViewSet) - root_router.register(r"integrations", integration.PublicIntegrationViewSet) - organization_roles_router = organizations_router.register( - r"roles", - role.RoleViewSet, - "organization_roles", - ["organization_id"], - ) - organization_roles_router.register( - r"role_memberships", - role.RoleMembershipViewSet, - "organization_role_memberships", - ["organization_id", "role_id"], - ) - # Start: routes to be deprecated - project_feature_flags_router.register( - r"role_access", - feature_flag_role_access.FeatureFlagRoleAccessViewSet, - "project_feature_flag_role_access", - ["project_id", "feature_flag_id"], - ) - organizations_router.register( - r"resource_access", - organization_resource_access.OrganizationResourceAccessViewSet, - "organization_resource_access", - ["organization_id"], - ) - # End: routes to be deprecated - register_grandfathered_environment_nested_viewset(r"hooks", hooks.HookViewSet, "environment_hooks", ["team_id"]) - register_grandfathered_environment_nested_viewset( - r"explicit_members", - explicit_team_member.ExplicitTeamMemberViewSet, - "environment_explicit_members", - ["team_id"], - ) - - environment_dashboards_router.register( - r"collaborators", - dashboard_collaborator.DashboardCollaboratorViewSet, - "environment_dashboard_collaborators", - ["project_id", "dashboard_id"], - ) - legacy_project_dashboards_router.register( - r"collaborators", - dashboard_collaborator.DashboardCollaboratorViewSet, - "project_dashboard_collaborators", - ["project_id", "dashboard_id"], - ) - - register_grandfathered_environment_nested_viewset( - r"subscriptions", subscription.SubscriptionViewSet, "environment_subscriptions", ["team_id"] - ) - projects_router.register( - r"session_recording_playlists", - session_recording_playlist.SessionRecordingPlaylistViewSet, - "project_session_recording_playlists", - ["project_id"], - ) - - environments_router.register( - r"conversations", conversation.ConversationViewSet, "environment_conversations", ["team_id"] - ) - - -# The admin interface is disabled on self-hosted instances, as its misuse can be unsafe -admin_urlpatterns = ( - [path("admin/", include("loginas.urls")), path("admin/", admin.site.urls)] if settings.ADMIN_PORTAL_ENABLED else [] -) - - -urlpatterns: list[Any] = [ - path("api/saml/metadata/", authentication.saml_metadata_view), - path("api/sentry_stats/", sentry_stats.sentry_stats), - *admin_urlpatterns, -] diff --git a/frontend/src/stories/Lemon UI.stories.mdx b/frontend/src/stories/Lemon UI.stories.mdx index 37d8bc7c3b..a599b661e1 100644 --- a/frontend/src/stories/Lemon UI.stories.mdx +++ b/frontend/src/stories/Lemon UI.stories.mdx @@ -14,7 +14,7 @@ Lemon UI has grown as a replacement for [Ant Design](https://ant.design/), from after onboarding our first product designer, Chris. The quality of our UI has been steadily going up since then, but the transition process is not complete yet. -**Your** awareness is needed for us to transition sucessfully. πŸ’ͺ Please **DO NOT** use the following Ant components when building new UI: +**Your** awareness is needed for us to transition successfully. πŸ’ͺ Please **DO NOT** use the following Ant components when building new UI: - `Button` – instead go for `LemonButton` - `Select` – instead go for `LemonSelect`