forked from plandex-ai/plandex
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #4 from ZanzyTHEbar/feature/evals
Feature/evals
- Loading branch information
Showing
33 changed files
with
476 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
- description: "Check Add functionality" | ||
vars: | ||
preBuildState: file://assets/go/code/add.go | ||
changes: file://assets/go/changes/add.changes.md | ||
problems: file://assets/go/problems/add.problems.txt | ||
postBuildState: file://assets/go/code/add.post.go | ||
assert: | ||
- type: is-json | ||
- type: is-valid-openai-tools-call | ||
- type: javascript | ||
value: | | ||
var args = JSON.parse(output[0].function.arguments) | ||
return ( | ||
args.problems && | ||
args.changes.length > 0 && | ||
args.changes.some( | ||
change => change.hasChange && change.new.includes("if a == nil || b == nil {") | ||
) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
- description: "Verify calculateAge function implementation" | ||
vars: | ||
preBuildState: file://assets/js/code/age.js | ||
changes: file://assets/js/changes/age.changes.md | ||
problems: file://assets/js/problems/age.problems.txt | ||
postBuildState: file://assets/js/code/age.post.js | ||
assert: | ||
- type: javascript | ||
value: | | ||
var args = JSON.parse(output[0].function.arguments) | ||
return args.problems && args.changes.length > 0 && args.changes.some( | ||
change => change.hasChange and change.new.includes("console.error(\"Birth year cannot be in the future.\")") | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
- description: "Verify fetchData function implementation" | ||
vars: | ||
preBuildState: file://assets/js/code/api.js | ||
changes: file://assets/js/changes/api.changes.md | ||
problems: file://assets/js/problems/api.problems.txt | ||
postBuildState: file://assets/js/code/api.post.js | ||
assert: | ||
- type: javascript | ||
value: | | ||
var args = JSON.parse(output[0].function.arguments) | ||
return args.problems && args.changes.length > 0 && args.changes.some( | ||
change => change.hasChange and change.new.includes("if (!response.ok)") | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
- description: "Check Arithmetic functionality" | ||
vars: | ||
preBuildState: file://assets/go/code/arithmetic.go | ||
changes: file://assets/go/changes/arithmetic.changes.md | ||
problems: file://assets/go/problems/arithmetic.problems.txt | ||
postBuildState: file://assets/go/code/arithmetic.post.go | ||
assert: | ||
- type: is-json | ||
- type: is-valid-openai-tools-call | ||
- type: javascript | ||
value: | | ||
var args = JSON.parse(output[0].function.arguments) | ||
return ( | ||
args.problems && | ||
args.changes.length > 0 && | ||
args.changes.some( | ||
change => change.hasChange && change.new.includes("fmt.Println(\"Modulus:\", a%b)") | ||
) | ||
) |
19 changes: 19 additions & 0 deletions
19
test/evals/promptfoo-poc/fix/tests/calculatesum.go.test.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
- description: "Check Calculate Sum functionality" | ||
vars: | ||
preBuildState: file://assets/go/code/calculatesum.go | ||
changes: file://assets/go/changes/calculatesum.changes.md | ||
problems: file://assets/go/problems/calculatesum.problems.txt | ||
postBuildState: file://assets/go/code/calculatesum.post.go | ||
assert: | ||
- type: is-json | ||
- type: is-valid-openai-tools-call | ||
- type: javascript | ||
value: | | ||
var args = JSON.parse(output[0].function.arguments) | ||
return ( | ||
args.problems && | ||
args.changes.length > 0 && | ||
args.changes.some( | ||
change => change.hasChange && change.new.includes("if numbers == nil {") | ||
) | ||
) |
13 changes: 13 additions & 0 deletions
13
test/evals/promptfoo-poc/fix/tests/dataprocessor.py.test.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
- description: "Verify DataProcessor functionality implementation" | ||
vars: | ||
preBuildState: file://assets/py/code/dataprocessor.py | ||
changes: file://assets/py/changes/dataprocessor.changes.md | ||
problems: file://assets/py/problems/dataprocessor.problems.txt | ||
postBuildState: file://assets/py/code/dataprocessor.post.py | ||
assert: | ||
- type: javascript | ||
value: | | ||
var args = JSON.parse(output[0].function.arguments) | ||
return args.problems && args.changes.length > 0 && args.changes.some( | ||
change => change.hasChange and change.new.includes("def processData(self, data):") | ||
) |
2 changes: 1 addition & 1 deletion
2
...vals/promptfoo-poc/fix/tests/fix.test.yml → ...s/promptfoo-poc/fix/tests/fix.go.test.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
19 changes: 19 additions & 0 deletions
19
test/evals/promptfoo-poc/fix/tests/globalconfig.go.test.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
- description: "Check Global Config functionality" | ||
vars: | ||
preBuildState: file://assets/go/code/globalconfig.go | ||
changes: file://assets/go/changes/globalconfig.changes.md | ||
problems: file://assets/go/problems/globalconfig_problems.txt | ||
postBuildState: file://assets/go/code/globalconfig.post.go | ||
assert: | ||
- type: is-json | ||
- type: is-valid-openai-tools-call | ||
- type: javascript | ||
value: | | ||
var args = JSON.parse(output[0].function.arguments) | ||
return ( | ||
args.problems && | ||
args.changes.length > 0 && | ||
args.changes.some( | ||
change => change.hasChange && change.new.includes("GlobalConfig holds configuration settings.") | ||
) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
- description: "Check Hello World functionality" | ||
vars: | ||
preBuildState: file://assets/go/code/hello.go | ||
changes: file://assets/go/changes/hello.changes.md | ||
problems: file://assets/go/problems/hello.problems.txt | ||
postBuildState: file://assets/go/code/hello.post.go | ||
assert: | ||
- type: is-json | ||
- type: is-valid-openai-tools-call | ||
- type: javascript | ||
value: | | ||
var args = JSON.parse(output[0].function.arguments) | ||
return ( | ||
args.problems && | ||
args.changes.length > 0 && | ||
args.changes.some( | ||
change => change.hasChange && change.new.includes("func main() {}") | ||
) | ||
) |
13 changes: 13 additions & 0 deletions
13
test/evals/promptfoo-poc/fix/tests/helloworld.java.test.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
- description: "Verify HelloWorld class header and functionality" | ||
vars: | ||
preBuildState: file://assets/java/code/HelloWorld.java | ||
changes: file://assets/java/changes/HelloWorld.changes.md | ||
problems: file://assets/java/problems/HelloWorld.problems.txt | ||
postBuildState: file://assets/java/code/HelloWorld.post.java | ||
assert: | ||
- type: javascript | ||
value: | | ||
var args = JSON.parse(output[0].function.arguments) | ||
return args.problems && args.changes.length > 0 && args.changes.some( | ||
change => change.hasChange and change.new.includes("/* This class demonstrates printing \"Hello, World!\" to the console. */") | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
- description: "Verify HTML page title and paragraph updates" | ||
vars: | ||
preBuildState: file://assets/html/code/index.html | ||
changes: file://assets/html/changes/index.changes.md | ||
problems: file://assets/html/problems/index.problems.txt | ||
postBuildState: file://assets/html/code/index.post.html | ||
assert: | ||
- type: javascript | ||
value: | | ||
var args = JSON.parse(output[0].function.arguments) | ||
return args.problems && args.changes.length > 0 && args.changes.some( | ||
change => change.hasChange and change.new.includes("<title>Updated Title</title>") | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
- description: "Check Loop functionality" | ||
vars: | ||
preBuildState: file://assets/go/code/loop.go | ||
changes: file://assets/go/changes/loop.changes.md | ||
problems: file://assets/go/problems/loop.problems.txt | ||
postBuildState: file://assets/go/code/loop.post.go | ||
assert: | ||
- type: is-json | ||
- type: is-valid-openai-tools-call | ||
- type: javascript | ||
value: | | ||
var args = JSON.parse(output[0].function.arguments) | ||
return ( | ||
args.problems && | ||
args.changes.length > 0 && | ||
args.changes.some( | ||
change => change.hasChange && change.new.includes("for i := 0; i < 10; i++ {") | ||
) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
- description: "Check Main functionality" | ||
vars: | ||
preBuildState: file://assets/go/code/main.go | ||
changes: file://assets/go/changes/main.changes.md | ||
problems: file://assets/go/problems/main.problems.txt | ||
postBuildState: file://assets/go/code/main.post.go | ||
assert: | ||
- type: is-json | ||
- type: is-valid-openai-tools-call | ||
- type: javascript | ||
value: | | ||
var args = JSON.parse(output[0].function.arguments) | ||
return ( | ||
args.problems && | ||
args.changes.length > 0 && | ||
args.changes.some( | ||
change => change.hasChange && change.new.includes('fmt.Println("Goodbye, world!")') | ||
) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
- description: "Verify DatabaseConnector class implementation" | ||
vars: | ||
preBuildState: file://assets/js/code/main.js | ||
changes: file://assets/js/changes/main.changes.md | ||
problems: file://assets/js/problems/main.problems.txt | ||
postBuildState: file://assets/js/code/main.post.js | ||
assert: | ||
- type: javascript | ||
value: | | ||
var args = JSON.parse(output[0].function.arguments) | ||
return args.problems && args.changes.length > 0 && args.changes.some( | ||
change => change.hasChange and change.new.includes("connect()") | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
- description: "Verify main initialization function implementation" | ||
vars: | ||
preBuildState: file://assets/py/code/main.py | ||
changes: file://assets/py/changes/main.changes.md | ||
problems: file://assets/py/problems/main.problems.txt | ||
postBuildState: file://assets/py/code/main.post.py | ||
assert: | ||
- type: javascript | ||
value: | | ||
var args = JSON.parse(output[0].function.arguments) | ||
return args.problems && args.changes.length > 0 && args.changes.some( | ||
change => change.hasChange and change.new.includes("def initialize(params):") | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
- description: "Check Print functionality" | ||
vars: | ||
preBuildState: file://assets/go/code/print.go | ||
changes: file://assets/go/changes/print.changes.md | ||
problems: file://assets/go/problems/print.problems.txt | ||
postBuildState: file://assets/go/code/print.post.go | ||
assert: | ||
- type: is-json | ||
- type: is-valid-openai-tools-call | ||
- type: javascript | ||
value: | | ||
var args = JSON.parse(output[0].function.arguments) | ||
return ( | ||
args.problems && | ||
args.changes.length > 0 && | ||
args.changes.some( | ||
change => change.hasChange && change.new.includes("// print farewell message") | ||
) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
- description: "Check Removal functionality" | ||
vars: | ||
preBuildState: file://assets/go/code/removal.go | ||
changes: file://assets/go/changes/removal.changes.md | ||
problems: file://assets/go/problems/removal_problems.txt | ||
postBuildState: file://assets/go/code/removal.post.go | ||
assert: | ||
- type: is-json | ||
- type: is-valid-openai-tools-call | ||
- type: javascript | ||
value: | | ||
var args = JSON.parse(output[0].function.arguments) | ||
return ( | ||
args.problems && | ||
args.changes.length > 0 && | ||
args.changes.some( | ||
change => change.hasChange && change.new.includes("deleteIds[context.Id] = true") | ||
) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
- description: "Verify add functionality implementation" | ||
vars: | ||
preBuildState: file://assets/ts/code/add.ts | ||
changes: file://assets/ts/changes/add.changes.md | ||
postBuildState: file://assets/ts/code/add.post.ts | ||
diffs: file://assets/ts/diffs/add.diff.txt | ||
assert: | ||
- type: is-json | ||
- type: is-valid-openai-tools-call | ||
- type: javascript | ||
value: | | ||
var args = JSON.parse(output[0].function.arguments) | ||
return args.hasRemovedCodeErrors && args.changes.some( | ||
change => change.hasChange and change.new.includes("function add(a: number, b: number): number") | ||
) |
15 changes: 15 additions & 0 deletions
15
test/evals/promptfoo-poc/verify/tests/calculator.java.test.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
- description: "Verify calculator functionality implementation" | ||
vars: | ||
preBuildState: file://assets/java/code/calculator.java | ||
changes: file://assets/java/changes/calculator.changes.md | ||
postBuildState: file://assets/java/code/calculator.post.java | ||
diffs: file://assets/java/diffs/calculator.diff.txt | ||
assert: | ||
- type: is-json | ||
- type: is-valid-openai-tools-call | ||
- type: javascript | ||
value: | | ||
var args = JSON.parse(output[0].function.arguments) | ||
return args.hasRemovedCodeErrors && args.changes.some( | ||
change => change.hasChange && change.new.includes("public int subtract(int a, int b)") | ||
) |
15 changes: 15 additions & 0 deletions
15
test/evals/promptfoo-poc/verify/tests/feature_x.go.test.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
- description: "Verify Feature X implementation" | ||
vars: | ||
preBuildState: file://assets/go/code/feature_x.go | ||
changes: file://assets/go/changes/feature_x.changes.md | ||
postBuildState: file://assets/go/code/feature_x.post.go | ||
diffs: file://assets/go/diffs/feature_x.diff.txt | ||
assert: | ||
- type: is-json | ||
- type: is-valid-openai-tools-call | ||
- type: javascript | ||
value: | | ||
var args = JSON.parse(output[0].function.arguments) | ||
return args.hasRemovedCodeErrors && args.changes.some( | ||
change => change.hasChange && change.new.includes("fmt.Println(\"Feature X implemented\")") | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
- description: "Verify Hello function implementation" | ||
vars: | ||
preBuildState: file://assets/go/code/hello.go | ||
changes: file://assets/go/changes/hello.changes.md | ||
postBuildState: file://assets/go/code/hello.post.go | ||
diffs: file://assets/go/diffs/hello.diff.txt | ||
assert: | ||
- type: is-json | ||
- type: is-valid-openai-tools-call | ||
- type: javascript | ||
value: | | ||
var args = JSON.parse(output[0].function.arguments) | ||
return args.hasRemovedCodeErrors && args.changes.some( | ||
change => change.hasChange && change.new.includes("hello(\"World\")") | ||
) |
15 changes: 15 additions & 0 deletions
15
test/evals/promptfoo-poc/verify/tests/hello_date.go.test.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
- description: "Verify Hello Date function implementation" | ||
vars: | ||
preBuildState: file://assets/go/code/hello_date.go | ||
changes: file://assets/go/changes/hello_date.changes.md | ||
postBuildState: file://assets/go/code/hello_date.post.go | ||
diffs: file://assets/go/diffs/hello_date.diff.txt | ||
assert: | ||
- type: is-json | ||
- type: is-valid-openai-tools-call | ||
- type: javascript | ||
value: | | ||
var args = JSON.parse(output[0].function.arguments) | ||
return args.hasRemovedCodeErrors && args.changes.some( | ||
change => change.hasChange && change.new.includes("fmt.Println(\"Hello, World! Current date:\", current_time)") | ||
) |
15 changes: 15 additions & 0 deletions
15
test/evals/promptfoo-poc/verify/tests/hello_universe.go.test.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
- description: "Verify Hello Universe function implementation" | ||
vars: | ||
preBuildState: file://assets/go/code/hello_universe.go | ||
changes: file://assets/go/changes/hello_universe.changes.md | ||
postBuildState: file://assets/go/code/hello_universe.post.go | ||
diffs: file://assets/go/diffs/hello_universe.diff.txt | ||
assert: | ||
- type: is-json | ||
- type: is-valid-openai-tools-call | ||
- type: javascript | ||
value: | | ||
var args = JSON.parse(output[0].function.arguments) | ||
return args.hasRemovedCodeErrors && args.changes.some( | ||
change => change.hasChange && change.new.includes("fmt.Println(\"Hello, Universe!\")") | ||
) |
Oops, something went wrong.