Skip to content

Commit

Permalink
go through smoke test of all API (#12)
Browse files Browse the repository at this point in the history
* add field progress msg into docinfo; add file processing procedure

* go through upload, create kb, add doc to kb

* smoke test for all API

* smoke test for all API
  • Loading branch information
KevinHuSh authored Dec 22, 2023
1 parent 72b7b5f commit 1eb186a
Show file tree
Hide file tree
Showing 27 changed files with 915 additions and 275 deletions.
2 changes: 1 addition & 1 deletion docker/.env
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ ES_PORT=9200
KIBANA_PORT=6601

# Increase or decrease based on the available host memory (in bytes)
MEM_LIMIT=1073741824
MEM_LIMIT=4073741824

POSTGRES_USER=root
POSTGRES_PASSWORD=infiniflow_docgpt
Expand Down
18 changes: 18 additions & 0 deletions docker/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,22 @@ services:
- docgpt
restart: always

minio:
image: quay.io/minio/minio:RELEASE.2023-12-20T01-00-02Z
container_name: docgpt-minio
command: server --console-address ":9001" /data
ports:
- 9000:9000
- 9001:9001
environment:
- MINIO_ROOT_USER=${MINIO_USER}
- MINIO_ROOT_PASSWORD=${MINIO_PASSWORD}
volumes:
- minio_data:/data
networks:
- docgpt
restart: always


volumes:
esdata01:
Expand All @@ -62,6 +78,8 @@ volumes:
driver: local
pg_data:
driver: local
minio_data:
driver: local

networks:
docgpt:
Expand Down
50 changes: 29 additions & 21 deletions migration/src/m20220101_000001_create_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,14 @@ impl MigrationTrait for Migration {
)
.col(ColumnDef::new(UserInfo::Email).string().not_null())
.col(ColumnDef::new(UserInfo::Nickname).string().not_null())
.col(ColumnDef::new(UserInfo::AvatarUrl).string())
.col(ColumnDef::new(UserInfo::ColorSchema).string().default("dark"))
.col(ColumnDef::new(UserInfo::AvatarBase64).string())
.col(ColumnDef::new(UserInfo::ColorScheme).string().default("dark"))
.col(ColumnDef::new(UserInfo::ListStyle).string().default("list"))
.col(ColumnDef::new(UserInfo::Language).string().default("chinese"))
.col(ColumnDef::new(UserInfo::Password).string().not_null())
.col(ColumnDef::new(UserInfo::CreatedAt).date().not_null())
.col(ColumnDef::new(UserInfo::UpdatedAt).date().not_null())
.col(ColumnDef::new(UserInfo::LastLoginAt).timestamp_with_time_zone())
.col(ColumnDef::new(UserInfo::CreatedAt).timestamp_with_time_zone().not_null())
.col(ColumnDef::new(UserInfo::UpdatedAt).timestamp_with_time_zone().not_null())
.col(ColumnDef::new(UserInfo::IsDeleted).boolean().default(false))
.to_owned(),
)
Expand All @@ -49,9 +50,9 @@ impl MigrationTrait for Migration {
.col(ColumnDef::new(TagInfo::Regx).string())
.col(ColumnDef::new(TagInfo::Color).tiny_unsigned().default(1))
.col(ColumnDef::new(TagInfo::Icon).tiny_unsigned().default(1))
.col(ColumnDef::new(TagInfo::Dir).string())
.col(ColumnDef::new(TagInfo::CreatedAt).date().not_null())
.col(ColumnDef::new(TagInfo::UpdatedAt).date().not_null())
.col(ColumnDef::new(TagInfo::FolderId).big_integer())
.col(ColumnDef::new(TagInfo::CreatedAt).timestamp_with_time_zone().not_null())
.col(ColumnDef::new(TagInfo::UpdatedAt).timestamp_with_time_zone().not_null())
.col(ColumnDef::new(TagInfo::IsDeleted).boolean().default(false))
.to_owned(),
)
Expand Down Expand Up @@ -89,6 +90,10 @@ impl MigrationTrait for Migration {
)
.col(ColumnDef::new(Kb2Doc::KbId).big_integer())
.col(ColumnDef::new(Kb2Doc::Did).big_integer())
.col(ColumnDef::new(Kb2Doc::KbProgress).float().default(0))
.col(ColumnDef::new(Kb2Doc::KbProgressMsg).string().default(""))
.col(ColumnDef::new(Kb2Doc::UpdatedAt).timestamp_with_time_zone().not_null())
.col(ColumnDef::new(Kb2Doc::IsDeleted).boolean().default(false))
.to_owned(),
)
.await?;
Expand Down Expand Up @@ -141,8 +146,8 @@ impl MigrationTrait for Migration {
.col(ColumnDef::new(KbInfo::Uid).big_integer().not_null())
.col(ColumnDef::new(KbInfo::KbName).string().not_null())
.col(ColumnDef::new(KbInfo::Icon).tiny_unsigned().default(1))
.col(ColumnDef::new(KbInfo::CreatedAt).date().not_null())
.col(ColumnDef::new(KbInfo::UpdatedAt).date().not_null())
.col(ColumnDef::new(KbInfo::CreatedAt).timestamp_with_time_zone().not_null())
.col(ColumnDef::new(KbInfo::UpdatedAt).timestamp_with_time_zone().not_null())
.col(ColumnDef::new(KbInfo::IsDeleted).boolean().default(false))
.to_owned(),
)
Expand All @@ -162,10 +167,8 @@ impl MigrationTrait for Migration {
.col(ColumnDef::new(DocInfo::Location).string().not_null())
.col(ColumnDef::new(DocInfo::Size).big_integer().not_null())
.col(ColumnDef::new(DocInfo::Type).string().not_null()).comment("doc|folder")
.col(ColumnDef::new(DocInfo::KbProgress).float().default(0))
.col(ColumnDef::new(DocInfo::KbProgressMsg).string().default(""))
.col(ColumnDef::new(DocInfo::CreatedAt).date().not_null())
.col(ColumnDef::new(DocInfo::UpdatedAt).date().not_null())
.col(ColumnDef::new(DocInfo::CreatedAt).timestamp_with_time_zone().not_null())
.col(ColumnDef::new(DocInfo::UpdatedAt).timestamp_with_time_zone().not_null())
.col(ColumnDef::new(DocInfo::IsDeleted).boolean().default(false))
.to_owned(),
)
Expand All @@ -182,10 +185,11 @@ impl MigrationTrait for Migration {
.auto_increment()
.primary_key())
.col(ColumnDef::new(DialogInfo::Uid).big_integer().not_null())
.col(ColumnDef::new(DialogInfo::KbId).big_integer().not_null())
.col(ColumnDef::new(DialogInfo::DialogName).string().not_null())
.col(ColumnDef::new(DialogInfo::History).string().comment("json"))
.col(ColumnDef::new(DialogInfo::CreatedAt).date().not_null())
.col(ColumnDef::new(DialogInfo::UpdatedAt).date().not_null())
.col(ColumnDef::new(DialogInfo::CreatedAt).timestamp_with_time_zone().not_null())
.col(ColumnDef::new(DialogInfo::UpdatedAt).timestamp_with_time_zone().not_null())
.col(ColumnDef::new(DialogInfo::IsDeleted).boolean().default(false))
.to_owned(),
)
Expand Down Expand Up @@ -241,11 +245,12 @@ enum UserInfo {
Uid,
Email,
Nickname,
AvatarUrl,
ColorSchema,
AvatarBase64,
ColorScheme,
ListStyle,
Language,
Password,
LastLoginAt,
CreatedAt,
UpdatedAt,
IsDeleted,
Expand All @@ -260,7 +265,7 @@ enum TagInfo {
Regx,
Color,
Icon,
Dir,
FolderId,
CreatedAt,
UpdatedAt,
IsDeleted,
Expand All @@ -280,6 +285,10 @@ enum Kb2Doc {
Id,
KbId,
Did,
KbProgress,
KbProgressMsg,
UpdatedAt,
IsDeleted,
}

#[derive(DeriveIden)]
Expand Down Expand Up @@ -319,8 +328,6 @@ enum DocInfo {
Location,
Size,
Type,
KbProgress,
KbProgressMsg,
CreatedAt,
UpdatedAt,
IsDeleted,
Expand All @@ -329,8 +336,9 @@ enum DocInfo {
#[derive(DeriveIden)]
enum DialogInfo {
Table,
DialogId,
Uid,
KbId,
DialogId,
DialogName,
History,
CreatedAt,
Expand Down
5 changes: 4 additions & 1 deletion python/conf/sys.cnf
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
[infiniflow]
es=127.0.0.1:9200
es=http://127.0.0.1:9200
pgdb_usr=root
pgdb_pwd=infiniflow_docgpt
pgdb_host=127.0.0.1
pgdb_port=5455
minio_host=127.0.0.1:9000
minio_usr=infiniflow
minio_pwd=infiniflow_docgpt

14 changes: 13 additions & 1 deletion python/nlp/huchunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
import copy
import base64
import magic
from dataclasses import dataclass
from typing import List
import numpy as np
Expand Down Expand Up @@ -373,6 +374,7 @@ def __call__(self, fnm):
from pptx import Presentation
ppt = Presentation(fnm)
flds = self.Fields()
flds.text_chunks = []
for slide in ppt.slides:
for shape in slide.shapes:
if hasattr(shape, "text"):
Expand All @@ -391,11 +393,21 @@ class Fields:
def __init__(self):
super().__init__()

@staticmethod
def is_binary_file(file_path):
mime = magic.Magic(mime=True)
file_type = mime.from_file(file_path)
if 'text' in file_type:
return False
else:
return True

def __call__(self, fnm):
flds = self.Fields()
if self.is_binary_file(fnm):return flds
with open(fnm, "r") as f:
txt = f.read()
flds.text_chunks = self.naive_text_chunk(txt)
flds.text_chunks = [(c, None) for c in self.naive_text_chunk(txt)]
flds.table_chunks = []
return flds

Expand Down
Loading

0 comments on commit 1eb186a

Please sign in to comment.