Skip to content

Commit

Permalink
feat: add chunk schema definitions for GitHub source connection, MVP …
Browse files Browse the repository at this point in the history
…support for Repositories and Repository Contents.
  • Loading branch information
lennertjansen committed Jan 28, 2025
1 parent c013dd9 commit 3314a0a
Showing 1 changed file with 77 additions and 0 deletions.
77 changes: 77 additions & 0 deletions backend/app/platform/chunks/github.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
"""GitHub chunk schemas.
Based on the GitHub REST API (read-only scope), we define chunk schemas for:
• Repository
• Repository Contents
References:
• https://docs.github.com/en/rest/repos/repos?apiVersion=2022-11-28 (Repositories)
• https://docs.github.com/en/rest/repos/contents?apiVersion=2022-11-28 (Repository contents)
"""

from datetime import datetime
from typing import List, Optional

from pydantic import Field

from app.platform.chunks._base import BaseChunk


class GithubRepoChunk(BaseChunk):
"""Schema for a GitHub repository.
References:
https://docs.github.com/en/rest/repos/repos?apiVersion=2022-11-28
"""

name: Optional[str] = Field(None, description="Name of the repository.")
full_name: Optional[str] = Field(None, description="Full name (including owner) of the repo.")
owner_login: Optional[str] = Field(None, description="Login/username of the repository owner.")
private: bool = Field(False, description="Whether the repository is private.")
description: Optional[str] = Field(None, description="Short description of the repository.")
fork: bool = Field(False, description="Whether this repository is a fork.")
created_at: Optional[datetime] = Field(None, description="When the repository was created.")
updated_at: Optional[datetime] = Field(
None, description="When the repository was last updated."
)
pushed_at: Optional[datetime] = Field(None, description="When the repository was last pushed.")
homepage: Optional[str] = Field(None, description="Homepage URL for the repository.")
size: Optional[int] = Field(None, description="Size of the repository (in kilobytes).")
stargazers_count: int = Field(0, description="Number of stars on this repository.")
watchers_count: int = Field(0, description="Number of people watching this repository.")
language: Optional[str] = Field(None, description="Primary language of the repository.")
forks_count: int = Field(0, description="Number of forks for this repository.")
open_issues_count: int = Field(0, description="Number of open issues on this repository.")
topics: List[str] = Field(default_factory=list, description="Topics/tags applied to this repo.")
default_branch: Optional[str] = Field(
None, description="Default branch name of the repository."
)
archived: bool = Field(False, description="Whether the repository is archived.")
disabled: bool = Field(False, description="Whether the repository is disabled in GitHub.")


class GithubContentChunk(BaseChunk):
"""Schema for a GitHub repository's content (file, directory, submodule, etc.).
References:
https://docs.github.com/en/rest/repos/contents?apiVersion=2022-11-28
"""

repo_full_name: Optional[str] = Field(None, description="Full name of the parent repository.")
path: Optional[str] = Field(None, description="Path of the file or directory within the repo.")
sha: Optional[str] = Field(None, description="SHA identifier for this content item.")
item_type: Optional[str] = Field(
None, description="Type of content. Typically 'file', 'dir', 'submodule', or 'symlink'."
)
size: Optional[int] = Field(None, description="Size of the content (in bytes).")
html_url: Optional[str] = Field(
None, description="HTML URL for viewing this content on GitHub."
)
download_url: Optional[str] = Field(None, description="Direct download URL if applicable.")
content: Optional[str] = Field(
None,
description="File content (base64-encoded) if retrieved via 'mediaType=raw' or similar.",
)
encoding: Optional[str] = Field(
None, description="Indicates the encoding of the content (e.g., 'base64')."
)

0 comments on commit 3314a0a

Please sign in to comment.