Files
MemoryBear/api/app/models/knowledge_model.py
lixiangcheng1 db46c186aa [ADD]Three party synchronization
1. Three party web website data access - Web site synchronization
Building a knowledge base by crawling web page data in batches through web crawlers
Web site synchronization utilizes crawler technology, which can automatically capture all websites under the same domain name through a single entry website. Currently, it supports up to 200 subpages. For compliance and security reasons, only static site crawling is supported, mainly used for quickly building knowledge bases on various document sites.
2. Feishu Knowledge Base
By configuring Feishu document permissions, a knowledge base can be built using Feishu documents, and the documents will not undergo secondary storage
3. Language Bird Knowledge Base
You can configure the permissions of the language bird document to build a knowledge base using the language bird document, and the document will not undergo secondary storage
2026-02-06 12:18:40 +08:00

103 lines
4.9 KiB
Python

import datetime
import uuid
import enum
from sqlalchemy import Column, Integer, String, JSON, DateTime, ForeignKey
from sqlalchemy.dialects.postgresql import UUID
from app.db import Base
from sqlalchemy.orm import relationship
class KnowledgeType(enum.StrEnum):
General = "General"
Web = "Web"
ThirdParty = "Third-party"
FOLDER = "Folder"
class ParserType(enum.StrEnum):
NAIVE = "naive"
QA = "qa"
MANUAL = "manual"
TABLE = "table"
PRESENTATION = "presentation"
LAWS = "laws"
PAPER = "paper"
RESUME = "resume"
BOOK = "book"
ONE = "one"
AUDIO = "audio"
EMAIL = "email"
TAG = "tag"
KG = "knowledge_graph"
class PermissionType(enum.StrEnum):
Private = "Private"
Share = "Share"
Memory = "Memory"
class Knowledge(Base):
__tablename__ = "knowledges"
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, index=True)
workspace_id = Column(UUID(as_uuid=True), nullable=False, comment="workspaces.id")
created_by = Column(UUID(as_uuid=True), ForeignKey('users.id'), nullable=False, comment="users.id")
parent_id = Column(UUID(as_uuid=True), nullable=True, default=None, comment="parent folder id when type is Folder")
name = Column(String, index=True, nullable=False, comment="KB name")
description = Column(String, comment="KB description")
avatar = Column(String, comment="avatar url")
type = Column(String, default="General", comment="Type:General|Web|Third-party|Folder")
permission_id = Column(String, default="Private", comment="permission ID:Private|Share|Memory")
embedding_id = Column(UUID(as_uuid=True), ForeignKey('model_configs.id', ondelete="SET NULL"), nullable=True, comment="default embedding model ID")
reranker_id = Column(UUID(as_uuid=True), ForeignKey('model_configs.id', ondelete="SET NULL"), nullable=True, comment="default reranker model ID")
llm_id = Column(UUID(as_uuid=True), ForeignKey('model_configs.id', ondelete="SET NULL"), nullable=True, comment="default llm model ID")
image2text_id = Column(UUID(as_uuid=True), ForeignKey('model_configs.id', ondelete="SET NULL"), nullable=True, comment="default image2text model ID")
doc_num = Column(Integer, default=0, comment="doc num")
chunk_num = Column(Integer, default=0, comment="chunk num")
parser_id = Column(String, index=True, default="naive", comment="default parser ID")
parser_config = Column(JSON, nullable=False,
default={
"entry_url": "https://ai.redbearai.com",
"max_pages": 20,
"delay_seconds": 1.0,
"timeout_seconds": 10,
"user_agent": "KnowledgeBaseCrawler/1.0",
"yuque_user_id": "User ID",
"yuque_token": "Token",
"feishu_app_id": "App ID",
"feishu_app_secret": "App Secret",
"feishu_folder_token": "Folder Token",
"sync_cron": "30 7 * * 1-5",
"layout_recognize": "DeepDOC",
"chunk_token_num": 128,
"delimiter": "\n",
"auto_keywords": 0,
"auto_questions": 0,
"html4excel": False,
"graphrag": {
"use_graphrag": False,
"scene_name": "",
"entity_types": [
"organization",
"person",
"geo",
"event",
"category"
],
"method": "general",
"resolution": True,
"community": True
}
},
comment="default parser config")
status = Column(Integer, index=True, default=1, comment="is it validate(0: disable, 1: enable, 2:Soft-delete)")
created_at = Column(DateTime, default=datetime.datetime.now)
updated_at = Column(DateTime, default=datetime.datetime.now)
# Relationships
created_user = relationship("User", backref="created_user")
embedding = relationship("ModelConfig", foreign_keys=[embedding_id], uselist=False, backref="embedding")
reranker = relationship("ModelConfig", foreign_keys=[reranker_id], uselist=False, backref="reranker")
llm = relationship("ModelConfig", foreign_keys=[llm_id], uselist=False, backref="llm")
image2text = relationship("ModelConfig", foreign_keys=[image2text_id], uselist=False, backref="image2text")