Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/Add metadata filter #94

Merged
merged 2 commits into from
May 12, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 30 additions & 4 deletions packages/components/nodes/documentloaders/Cheerio/Cheerio.ts
Original file line number Diff line number Diff line change
@@ -31,12 +31,21 @@ class Cheerio_DocumentLoaders implements INode {
name: 'textSplitter',
type: 'TextSplitter',
optional: true
},
{
label: 'Metadata',
name: 'metadata',
type: 'json',
optional: true,
additionalParams: true
}
]
}

async init(nodeData: INodeData): Promise<any> {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const metadata = nodeData.inputs?.metadata

let url = nodeData.inputs?.url as string

var urlPattern = new RegExp(
@@ -50,14 +59,31 @@ class Cheerio_DocumentLoaders implements INode {
) // validate fragment locator

const loader = new CheerioWebBaseLoader(urlPattern.test(url.trim()) ? url.trim() : '')
let docs = []

if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
return docs
docs = await loader.loadAndSplit(textSplitter)
} else {
const docs = await loader.load()
return docs
docs = await loader.load()
}

if (metadata) {
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
let finaldocs = []
for (const doc of docs) {
const newdoc = {
...doc,
metadata: {
...doc.metadata,
...parsedMetadata
}
}
finaldocs.push(newdoc)
}
return finaldocs
}

return docs
}
}

33 changes: 29 additions & 4 deletions packages/components/nodes/documentloaders/Csv/Csv.ts
Original file line number Diff line number Diff line change
@@ -41,6 +41,13 @@ class Csv_DocumentLoaders implements INode {
description: 'Extracting a single column',
placeholder: 'Enter column name',
optional: true
},
{
label: 'Metadata',
name: 'metadata',
type: 'json',
optional: true,
additionalParams: true
}
]
}
@@ -49,17 +56,35 @@ class Csv_DocumentLoaders implements INode {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const csvFileBase64 = nodeData.inputs?.csvFile as string
const columnName = nodeData.inputs?.columnName as string
const metadata = nodeData.inputs?.metadata

const blob = new Blob(getBlob(csvFileBase64))
const loader = new CSVLoader(blob, columnName.trim().length === 0 ? undefined : columnName.trim())
let docs = []

if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
return docs
docs = await loader.loadAndSplit(textSplitter)
} else {
const docs = await loader.load()
return docs
docs = await loader.load()
}

if (metadata) {
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
let finaldocs = []
for (const doc of docs) {
const newdoc = {
...doc,
metadata: {
...doc.metadata,
...parsedMetadata
}
}
finaldocs.push(newdoc)
}
return finaldocs
}

return docs
}
}

33 changes: 29 additions & 4 deletions packages/components/nodes/documentloaders/Docx/Docx.ts
Original file line number Diff line number Diff line change
@@ -33,24 +33,49 @@ class Docx_DocumentLoaders implements INode {
name: 'textSplitter',
type: 'TextSplitter',
optional: true
},
{
label: 'Metadata',
name: 'metadata',
type: 'json',
optional: true,
additionalParams: true
}
]
}

async init(nodeData: INodeData): Promise<any> {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const docxFileBase64 = nodeData.inputs?.docxFile as string
const metadata = nodeData.inputs?.metadata

const blob = new Blob(getBlob(docxFileBase64))
const loader = new DocxLoader(blob)
let docs = []

if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
return docs
docs = await loader.loadAndSplit(textSplitter)
} else {
const docs = await loader.load()
return docs
docs = await loader.load()
}

if (metadata) {
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
let finaldocs = []
for (const doc of docs) {
const newdoc = {
...doc,
metadata: {
...doc.metadata,
...parsedMetadata
}
}
finaldocs.push(newdoc)
}
return finaldocs
}

return docs
}
}

33 changes: 29 additions & 4 deletions packages/components/nodes/documentloaders/Folder/Folder.ts
Original file line number Diff line number Diff line change
@@ -37,13 +37,21 @@ class Folder_DocumentLoaders implements INode {
name: 'textSplitter',
type: 'TextSplitter',
optional: true
},
{
label: 'Metadata',
name: 'metadata',
type: 'json',
optional: true,
additionalParams: true
}
]
}

async init(nodeData: INodeData): Promise<any> {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const folderPath = nodeData.inputs?.folderPath as string
const metadata = nodeData.inputs?.metadata

const loader = new DirectoryLoader(folderPath, {
'.json': (path) => new JSONLoader(path),
@@ -53,14 +61,31 @@ class Folder_DocumentLoaders implements INode {
// @ts-ignore
'.pdf': (path) => new PDFLoader(path, { pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') })
})
let docs = []

if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
return docs
docs = await loader.loadAndSplit(textSplitter)
} else {
const docs = await loader.load()
return docs
docs = await loader.load()
}

if (metadata) {
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
let finaldocs = []
for (const doc of docs) {
const newdoc = {
...doc,
metadata: {
...doc.metadata,
...parsedMetadata
}
}
finaldocs.push(newdoc)
}
return finaldocs
}

return docs
}
}

33 changes: 29 additions & 4 deletions packages/components/nodes/documentloaders/Github/Github.ts
Original file line number Diff line number Diff line change
@@ -45,6 +45,13 @@ class Github_DocumentLoaders implements INode {
name: 'textSplitter',
type: 'TextSplitter',
optional: true
},
{
label: 'Metadata',
name: 'metadata',
type: 'json',
optional: true,
additionalParams: true
}
]
}
@@ -54,6 +61,7 @@ class Github_DocumentLoaders implements INode {
const branch = nodeData.inputs?.branch as string
const accessToken = nodeData.inputs?.accessToken as string
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const metadata = nodeData.inputs?.metadata

const options: GithubRepoLoaderParams = {
branch,
@@ -64,14 +72,31 @@ class Github_DocumentLoaders implements INode {
if (accessToken) options.accessToken = accessToken

const loader = new GithubRepoLoader(repoLink, options)
let docs = []

if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
return docs
docs = await loader.loadAndSplit(textSplitter)
} else {
const docs = await loader.load()
return docs
docs = await loader.load()
}

if (metadata) {
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
let finaldocs = []
for (const doc of docs) {
const newdoc = {
...doc,
metadata: {
...doc.metadata,
...parsedMetadata
}
}
finaldocs.push(newdoc)
}
return finaldocs
}

return docs
}
}

33 changes: 29 additions & 4 deletions packages/components/nodes/documentloaders/Json/Json.ts
Original file line number Diff line number Diff line change
@@ -41,6 +41,13 @@ class Json_DocumentLoaders implements INode {
description: 'Extracting multiple pointers',
placeholder: 'Enter pointers name',
optional: true
},
{
label: 'Metadata',
name: 'metadata',
type: 'json',
optional: true,
additionalParams: true
}
]
}
@@ -49,6 +56,7 @@ class Json_DocumentLoaders implements INode {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const jsonFileBase64 = nodeData.inputs?.jsonFile as string
const pointersName = nodeData.inputs?.pointersName as string
const metadata = nodeData.inputs?.metadata

let pointers: string[] = []
if (pointersName) {
@@ -58,14 +66,31 @@ class Json_DocumentLoaders implements INode {

const blob = new Blob(getBlob(jsonFileBase64))
const loader = new JSONLoader(blob, pointers.length != 0 ? pointers : undefined)
let docs = []

if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
return docs
docs = await loader.loadAndSplit(textSplitter)
} else {
const docs = await loader.load()
return docs
docs = await loader.load()
}

if (metadata) {
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
let finaldocs = []
for (const doc of docs) {
const newdoc = {
...doc,
metadata: {
...doc.metadata,
...parsedMetadata
}
}
finaldocs.push(newdoc)
}
return finaldocs
}

return docs
}
}

33 changes: 29 additions & 4 deletions packages/components/nodes/documentloaders/Notion/Notion.ts
Original file line number Diff line number Diff line change
@@ -33,23 +33,48 @@ class Notion_DocumentLoaders implements INode {
name: 'textSplitter',
type: 'TextSplitter',
optional: true
},
{
label: 'Metadata',
name: 'metadata',
type: 'json',
optional: true,
additionalParams: true
}
]
}

async init(nodeData: INodeData): Promise<any> {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const notionFolder = nodeData.inputs?.notionFolder as string
const metadata = nodeData.inputs?.metadata

const loader = new NotionLoader(notionFolder)
let docs = []

if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
return docs
docs = await loader.loadAndSplit(textSplitter)
} else {
const docs = await loader.load()
return docs
docs = await loader.load()
}

if (metadata) {
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
let finaldocs = []
for (const doc of docs) {
const newdoc = {
...doc,
metadata: {
...doc.metadata,
...parsedMetadata
}
}
finaldocs.push(newdoc)
}
return finaldocs
}

return docs
}
}

40 changes: 31 additions & 9 deletions packages/components/nodes/documentloaders/Pdf/Pdf.ts
Original file line number Diff line number Diff line change
@@ -49,6 +49,13 @@ class Pdf_DocumentLoaders implements INode {
}
],
default: 'perPage'
},
{
label: 'Metadata',
name: 'metadata',
type: 'json',
optional: true,
additionalParams: true
}
]
}
@@ -57,30 +64,45 @@ class Pdf_DocumentLoaders implements INode {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const pdfFileBase64 = nodeData.inputs?.pdfFile as string
const usage = nodeData.inputs?.usage as string
const metadata = nodeData.inputs?.metadata

const blob = new Blob(getBlob(pdfFileBase64))

let docs = []
if (usage === 'perFile') {
// @ts-ignore
const loader = new PDFLoader(blob, { splitPages: false, pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') })
if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
return docs
docs = await loader.loadAndSplit(textSplitter)
} else {
const docs = await loader.load()
return docs
docs = await loader.load()
}
} else {
// @ts-ignore
const loader = new PDFLoader(blob, { pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') })
if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
return docs
docs = await loader.loadAndSplit(textSplitter)
} else {
const docs = await loader.load()
return docs
docs = await loader.load()
}
}

if (metadata) {
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
let finaldocs = []
for (const doc of docs) {
const newdoc = {
...doc,
metadata: {
...doc.metadata,
...parsedMetadata
}
}
finaldocs.push(newdoc)
}
return finaldocs
}

return docs
}
}

32 changes: 28 additions & 4 deletions packages/components/nodes/documentloaders/Text/Text.ts
Original file line number Diff line number Diff line change
@@ -33,24 +33,48 @@ class Text_DocumentLoaders implements INode {
name: 'textSplitter',
type: 'TextSplitter',
optional: true
},
{
label: 'Metadata',
name: 'metadata',
type: 'json',
optional: true,
additionalParams: true
}
]
}

async init(nodeData: INodeData): Promise<any> {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const txtFileBase64 = nodeData.inputs?.txtFile as string
const metadata = nodeData.inputs?.metadata

const blob = new Blob(getBlob(txtFileBase64))
const loader = new TextLoader(blob)
let docs = []

if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
return docs
docs = await loader.loadAndSplit(textSplitter)
} else {
const docs = await loader.load()
return docs
docs = await loader.load()
}

if (metadata) {
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
let finaldocs = []
for (const doc of docs) {
const newdoc = {
...doc,
metadata: {
...doc.metadata,
...parsedMetadata
}
}
finaldocs.push(newdoc)
}
return finaldocs
}
return docs
}
}

Original file line number Diff line number Diff line change
@@ -50,6 +50,13 @@ class Pinecone_Existing_VectorStores implements INode {
type: 'string',
placeholder: 'my-first-namespace',
optional: true
},
{
label: 'Pinecone Metadata Filter',
name: 'pineconeMetadataFilter',
type: 'json',
optional: true,
additionalParams: true
}
]
this.outputs = [
@@ -71,6 +78,8 @@ class Pinecone_Existing_VectorStores implements INode {
const pineconeEnv = nodeData.inputs?.pineconeEnv as string
const index = nodeData.inputs?.pineconeIndex as string
const pineconeNamespace = nodeData.inputs?.pineconeNamespace as string
const pineconeMetadataFilter = nodeData.inputs?.pineconeMetadataFilter

const embeddings = nodeData.inputs?.embeddings as Embeddings
const output = nodeData.outputs?.output as string

@@ -87,6 +96,10 @@ class Pinecone_Existing_VectorStores implements INode {
}

if (pineconeNamespace) obj.namespace = pineconeNamespace
if (pineconeMetadataFilter) {
const metadatafilter = typeof pineconeMetadataFilter === 'object' ? pineconeMetadataFilter : JSON.parse(pineconeMetadataFilter)
obj.filter = metadatafilter
}

const vectorStore = await PineconeStore.fromExistingIndex(embeddings, obj)

Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { Embeddings } from 'langchain/embeddings/base'
import { getBaseClasses } from '../../../src/utils'
import { SupabaseVectorStore } from 'langchain/vectorstores/supabase'
import { SupabaseLibArgs, SupabaseVectorStore } from 'langchain/vectorstores/supabase'
import { createClient } from '@supabase/supabase-js'

class Supabase_Existing_VectorStores implements INode {
@@ -48,6 +48,13 @@ class Supabase_Existing_VectorStores implements INode {
label: 'Query Name',
name: 'queryName',
type: 'string'
},
{
label: 'Supabase Metadata Filter',
name: 'supabaseMetadataFilter',
type: 'json',
optional: true,
additionalParams: true
}
]
this.outputs = [
@@ -70,15 +77,23 @@ class Supabase_Existing_VectorStores implements INode {
const tableName = nodeData.inputs?.tableName as string
const queryName = nodeData.inputs?.queryName as string
const embeddings = nodeData.inputs?.embeddings as Embeddings
const supabaseMetadataFilter = nodeData.inputs?.supabaseMetadataFilter
const output = nodeData.outputs?.output as string

const client = createClient(supabaseProjUrl, supabaseApiKey)

const vectorStore = await SupabaseVectorStore.fromExistingIndex(embeddings, {
const obj: SupabaseLibArgs = {
client,
tableName: tableName,
queryName: queryName
})
tableName,
queryName
}

if (supabaseMetadataFilter) {
const metadatafilter = typeof supabaseMetadataFilter === 'object' ? supabaseMetadataFilter : JSON.parse(supabaseMetadataFilter)
obj.filter = metadatafilter
}

const vectorStore = await SupabaseVectorStore.fromExistingIndex(embeddings, obj)

if (output === 'retriever') {
const retriever = vectorStore.asRetriever()
8 changes: 8 additions & 0 deletions packages/server/marketplaces/AutoGPT.json
Original file line number Diff line number Diff line change
@@ -481,6 +481,14 @@
"placeholder": "my-first-namespace",
"optional": true,
"id": "pineconeExistingIndex_1-input-pineconeNamespace-string"
},
{
"label": "Pinecone Metadata Filter",
"name": "pineconeMetadataFilter",
"type": "json",
"optional": true,
"additionalParams": true,
"id": "pineconeExistingIndex_1-input-pineconeMetadataFilter-json"
}
],
"inputAnchors": [
8 changes: 8 additions & 0 deletions packages/server/marketplaces/BabyAGI.json
Original file line number Diff line number Diff line change
@@ -117,6 +117,14 @@
"placeholder": "my-first-namespace",
"optional": true,
"id": "pineconeExistingIndex_1-input-pineconeNamespace-string"
},
{
"label": "Pinecone Metadata Filter",
"name": "pineconeMetadataFilter",
"type": "json",
"optional": true,
"additionalParams": true,
"id": "pineconeExistingIndex_1-input-pineconeMetadataFilter-json"
}
],
"inputAnchors": [
Original file line number Diff line number Diff line change
@@ -82,6 +82,14 @@
"type": "file",
"fileType": ".txt",
"id": "textFile_1-input-txtFile-file"
},
{
"label": "Metadata",
"name": "metadata",
"type": "json",
"optional": true,
"additionalParams": true,
"id": "textFile_1-input-metadata-json"
}
],
"inputAnchors": [
8 changes: 8 additions & 0 deletions packages/server/marketplaces/Github Repo QnA.json
Original file line number Diff line number Diff line change
@@ -150,6 +150,14 @@
"placeholder": "<GITHUB_ACCESS_TOKEN>",
"optional": true,
"id": "github_1-input-accessToken-password"
},
{
"label": "Metadata",
"name": "metadata",
"type": "json",
"optional": true,
"additionalParams": true,
"id": "github_1-input-metadata-json"
}
],
"inputAnchors": [
414 changes: 414 additions & 0 deletions packages/server/marketplaces/Metadata Filter Load.json

Large diffs are not rendered by default.

662 changes: 662 additions & 0 deletions packages/server/marketplaces/Metadata Filter Upsert.json

Large diffs are not rendered by default.

8 changes: 8 additions & 0 deletions packages/server/marketplaces/Multiple VectorDB.json
Original file line number Diff line number Diff line change
@@ -821,6 +821,14 @@
"placeholder": "my-first-namespace",
"optional": true,
"id": "pineconeExistingIndex_1-input-pineconeNamespace-string"
},
{
"label": "Pinecone Metadata Filter",
"name": "pineconeMetadataFilter",
"type": "json",
"optional": true,
"additionalParams": true,
"id": "pineconeExistingIndex_1-input-pineconeMetadataFilter-json"
}
],
"inputAnchors": [
6 changes: 4 additions & 2 deletions packages/server/src/utils/index.ts
Original file line number Diff line number Diff line change
@@ -404,8 +404,10 @@ export const isSameOverrideConfig = (
existingOverrideConfig?: ICommonObject,
newOverrideConfig?: ICommonObject
): boolean => {
// Skip check if its internal call
if (isInternal) return true
if (isInternal) {
if (existingOverrideConfig && Object.keys(existingOverrideConfig).length) return false
return true
}
// If existing and new overrideconfig are the same
if (
existingOverrideConfig &&
64 changes: 64 additions & 0 deletions packages/ui/src/ui-component/json/JsonEditor.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import { useState } from 'react'
import PropTypes from 'prop-types'
import { FormControl } from '@mui/material'
import ReactJson from 'react-json-view'

export const JsonEditorInput = ({ value, onChange, disabled = false, isDarkMode = false }) => {
const [myValue, setMyValue] = useState(value ? JSON.parse(value) : {})

const onClipboardCopy = (e) => {
const src = e.src
if (Array.isArray(src) || typeof src === 'object') {
navigator.clipboard.writeText(JSON.stringify(src, null, ' '))
} else {
navigator.clipboard.writeText(src)
}
}

return (
<>
<FormControl sx={{ mt: 1, width: '100%' }} size='small'>
{disabled && (
<ReactJson
theme={isDarkMode ? 'ocean' : 'rjv-default'}
style={{ padding: 10, borderRadius: 10 }}
src={myValue}
name={null}
enableClipboard={(e) => onClipboardCopy(e)}
quotesOnKeys={false}
displayDataTypes={false}
/>
)}
{!disabled && (
<ReactJson
theme={isDarkMode ? 'ocean' : 'rjv-default'}
style={{ padding: 10, borderRadius: 10 }}
src={myValue}
name={null}
quotesOnKeys={false}
displayDataTypes={false}
enableClipboard={(e) => onClipboardCopy(e)}
onEdit={(edit) => {
setMyValue(edit.updated_src)
onChange(JSON.stringify(edit.updated_src))
}}
onAdd={() => {
//console.log(add)
}}
onDelete={(deleteobj) => {
setMyValue(deleteobj.updated_src)
onChange(JSON.stringify(deleteobj.updated_src))
}}
/>
)}
</FormControl>
</>
)
}

JsonEditorInput.propTypes = {
value: PropTypes.string,
onChange: PropTypes.func,
disabled: PropTypes.bool,
isDarkMode: PropTypes.bool
}
11 changes: 11 additions & 0 deletions packages/ui/src/views/canvas/NodeInputHandler.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import PropTypes from 'prop-types'
import { Handle, Position, useUpdateNodeInternals } from 'reactflow'
import { useEffect, useRef, useState, useContext } from 'react'
import { useSelector } from 'react-redux'

// material-ui
import { useTheme, styled } from '@mui/material/styles'
@@ -15,6 +16,7 @@ import { File } from 'ui-component/file/File'
import { SwitchInput } from 'ui-component/switch/Switch'
import { flowContext } from 'store/context/ReactFlowContext'
import { isValidConnection, getAvailableNodesForVariable } from 'utils/genericHelper'
import { JsonEditorInput } from 'ui-component/json/JsonEditor'

const CustomWidthTooltip = styled(({ className, ...props }) => <Tooltip {...props} classes={{ popper: className }} />)({
[`& .${tooltipClasses.tooltip}`]: {
@@ -26,6 +28,7 @@ const CustomWidthTooltip = styled(({ className, ...props }) => <Tooltip {...prop

const NodeInputHandler = ({ inputAnchor, inputParam, data, disabled = false, isAdditionalParams = false }) => {
const theme = useTheme()
const customization = useSelector((state) => state.customization)
const ref = useRef(null)
const { reactFlowInstance } = useContext(flowContext)
const updateNodeInternals = useUpdateNodeInternals()
@@ -166,6 +169,14 @@ const NodeInputHandler = ({ inputAnchor, inputParam, data, disabled = false, isA
onDialogConfirm={(newValue, inputParamName) => onExpandDialogSave(newValue, inputParamName)}
/>
)}
{inputParam.type === 'json' && (
<JsonEditorInput
disabled={disabled}
onChange={(newValue) => (data.inputs[inputParam.name] = newValue)}
value={data.inputs[inputParam.name] ?? inputParam.default ?? ''}
isDarkMode={customization.isDarkMode}
/>
)}
{inputParam.type === 'options' && (
<Dropdown
disabled={disabled}