From 28fa3ed3cc61e558ce9b20079f32e0035258eeaf Mon Sep 17 00:00:00 2001 From: aleidk Date: Mon, 2 Dec 2024 15:13:44 -0300 Subject: [PATCH] feat: add simple reconsiliation capabilities --- examples/readeck/main.go | 162 ++++++++++++++++++++++++++++++++++++++ examples/readwise/main.go | 16 ++-- pkg/collection.go | 124 +++++++++++++++++++++++++++++ pkg/platform.go | 101 +++++++++++++++++++++++- pkg/synchronizator.go | 56 ++++++++++++- 5 files changed, 451 insertions(+), 8 deletions(-) create mode 100644 examples/readeck/main.go diff --git a/examples/readeck/main.go b/examples/readeck/main.go new file mode 100644 index 0000000..fd4cc55 --- /dev/null +++ b/examples/readeck/main.go @@ -0,0 +1,162 @@ +package main + +import ( + "database/sql" + "encoding/json" + "fmt" + "time" + + _ "modernc.org/sqlite" + + synchronizator "git.alecodes.page/alecodes/synchronizator/pkg" +) + +type ReadwiseDocument struct { + Id string `json:"id"` + Url string `json:"url"` + Title string `json:"title"` + Location string `json:"location"` + SourceUrl string `json:"source_url"` + Tags map[string]ReadwiseTag `json:"tags"` +} + +type ReadwiseTag struct { + Name string `json:"name"` + Type string `json:"type"` + Created int `json:"created"` +} + +type ReadwiseHighlight struct { + UserBookID int `json:"user_book_id"` + Title string `json:"title"` + Author string `json:"author"` + Source string `json:"source"` + UniqueURL string `json:"unique_url"` + BookTags []HighlightTag `json:"book_tags"` + Category string `json:"category"` + DocumentNote *string `json:"document_note"` + ReadwiseURL string `json:"readwise_url"` + SourceURL string `json:"source_url"` + Highlights []HighlightItem `json:"highlights"` +} + +type HighlightTag struct { + Id int `json:"id"` + Name string `json:"name"` +} + +type HighlightItem struct { + ID int `json:"id"` + Text string `json:"text"` + Location int `json:"location"` + LocationType string `json:"location_type"` + Note string `json:"note"` + Color string `json:"color"` + HighlightedAt string `json:"highlighted_at"` + CreatedAt string `json:"created_at"` + UpdatedAt string `json:"updated_at"` + ExternalID string `json:"external_id"` + EndLocation *int `json:"end_location"` + URL string `json:"url"` + BookID int `json:"book_id"` + Tags []HighlightTag `json:"tags"` + IsFavorite bool `json:"is_favorite"` + IsDiscard bool `json:"is_discard"` + ReadwiseURL string `json:"readwise_url"` +} + +type ReadeckBookmark struct { + Id string `json:"id"` + Url string `json:"url"` + Title string `json:"title"` + Labels []string `json:"labels"` + IsArchived bool `json:"is_archived"` +} + +func readwiseToReadeck(document *ReadwiseDocument) (*synchronizator.Node, error) { + bookmark := &ReadeckBookmark{ + Url: document.SourceUrl, + Title: document.Title, + } + + for _, tag := range document.Tags { + bookmark.Labels = append(bookmark.Labels, tag.Name) + } + + metadata, err := json.Marshal(bookmark) + if err != nil { + return nil, err + } + + node := synchronizator.NewNode(bookmark.Title, "BOOKMARK", metadata, nil) + + return node, nil +} + +func drop_data(conn *sql.DB) error { + sql := ` + DELETE FROM nodes WHERE name = 'READECK'; + ` + _, err := conn.Exec(sql) + + return err +} + +func main() { + start := time.Now() + + defer func() { + elapsed := time.Now().Sub(start) + fmt.Printf("\n\nExecution time took: %s", elapsed) + }() + + connection, err := sql.Open("sqlite", "readwise.sql") + if err != nil { + fmt.Println(err) + + return + } + + defer connection.Close() + + opts := synchronizator.DefaultOptions + // opts.Log_level = synchronizator.DEBUG + + err = drop_data(connection) + if err != nil { + fmt.Println(err) + + return + } + + sync, err := synchronizator.New(connection, opts) + if err != nil { + fmt.Println(err) + + return + } + + readeck, err := sync.NewPlatform("READECK", nil, nil) + if err != nil { + fmt.Println(err) + return + } + + collection, err := readeck.GetDefaultCollection() + if err != nil { + fmt.Println(err) + return + } + + readwiseDocuments, err := sync.GetCollection(3) + if err != nil { + fmt.Println(err) + return + } + + err = synchronizator.ReconciliateCollections(readwiseDocuments, collection, readwiseToReadeck) + if err != nil { + fmt.Println(err) + return + } +} diff --git a/examples/readwise/main.go b/examples/readwise/main.go index 284f0c2..47c2807 100644 --- a/examples/readwise/main.go +++ b/examples/readwise/main.go @@ -24,7 +24,7 @@ type ReadwiseCursor struct { type ReadwiseApiResponse[T, S any] struct { Results []T `json:"results"` - Detail string `json:detail` + Detail string `json:"detail"` Count uint64 `json:"count"` NextPageCursor S `json:"nextPageCursor"` } @@ -40,8 +40,8 @@ type ReadwiseDocument struct { // Author string `json:"author"` // Source string `json:"source"` // Category string `json:"category"` - Location string `json:"location"` - // Tags map[string]string `json:"tags"` + Location string `json:"location"` + Tags map[string]ReadwiseTag `json:"tags"` // SiteName string `json:"site_name"` // CreatedAt string `json:"created_at"` // UpdatedAt string `json:"updated_at"` @@ -53,6 +53,12 @@ type ReadwiseDocument struct { // LastMovedAt string `json:"last_moved_at"` } +type ReadwiseTag struct { + Name string `json:"name"` + Type string `json:"type"` + Created int `json:"created"` +} + type ReadwiseHighlight struct { UserBookID int `json:"user_book_id"` Title string `json:"title"` @@ -68,8 +74,8 @@ type ReadwiseHighlight struct { } type HighlightTag struct { - Id int `json:id` - Name string `json:name` + Id int `json:"id"` + Name string `json:"name"` } type HighlightItem struct { diff --git a/pkg/collection.go b/pkg/collection.go index a7592e5..70345f4 100644 --- a/pkg/collection.go +++ b/pkg/collection.go @@ -2,6 +2,7 @@ package synchronizator import ( "context" + "encoding/json" "fmt" "slices" "strings" @@ -35,6 +36,58 @@ type Collection struct { is_default bool } +func (collection *Collection) loadNodes() error { + sql := ` + WITH RECURSIVE NodeRelationships AS ( + SELECT + *, + relationships._class AS relationship_class, + relationships.metadata AS relationship_metadata + FROM + nodes as src + JOIN relationships ON src.id = relationships.node_from + WHERE src.id = $1 AND relationships._class = 'COLLECTION_HAS_NODE' + ) + SELECT + dst.id, + dst.name, + dst._class, + dst.metadata, + dst.original_data + FROM + NodeRelationships + JOIN nodes as dst ON dst.id = NodeRelationships.node_to + ` + + rows, err := collection._conn.Query(sql, collection.Id) + if err != nil { + return err + } + + defer rows.Close() + + collection._relationships = make([]StandardRelationship, 0) + collection.childs = make([]*Node, 0) + for rows.Next() { + node := &Node{ + _conn: collection._conn, + } + if err := rows.Scan( + &node.Id, + &node.name, + &node._class, + &node.metadata, + &node.originalData, + ); err != nil { + return err + } + + collection.childs = append(collection.childs, node) + } + + return nil +} + // NewCollectionObject creates a new Collection instance without persisting it to the database. // This is useful when you want to prepare a Collection for later storage. func NewCollection(name string, metadata []byte) *Collection { @@ -145,3 +198,74 @@ func (collection *Collection) FetchNodes( return nil } + +type Reconcilation[T any] func(src *T) (*Node, error) + +func ReconciliateCollections[T any](src, dst *Collection, work Reconcilation[T]) error { + newNodes := make([]*Node, 0, len(src.childs)) + updateNodes := make([]*Node, 0, len(src.childs)) + for _, srcNode := range src.childs { + var metadata T + err := json.Unmarshal(srcNode.metadata, &metadata) + if err != nil { + return err + } + + dstNode, err := work(&metadata) + if err != nil { + return err + } + + if dstNode == nil { + return fmt.Errorf("Node pointer is null") + } + + if dstNode.Id == -1 { + newNodes = append(newNodes, dstNode) + } else { + updateNodes = append(updateNodes, dstNode) + } + } + + BulkCreateNode(src._conn, newNodes) + // BulkUpdateNode(src._conn, updateNodes) + + relationships := make([]*Relationship, 0, len(newNodes)+len(updateNodes)) + + appendRelationship := func(item *Node) error { + relation := &Relationship{ + _class: "COLLECTION_HAS_NODE", + From: dst.Id, + To: item.Id, + } + + err := dst.AddRelationship(relation) + if err != nil { + return err + } + relationships = append(relationships, relation) + + return nil + } + + for _, item := range newNodes { + err := appendRelationship(item) + if err != nil { + return err + } + } + + for _, item := range updateNodes { + err := appendRelationship(item) + if err != nil { + return err + } + } + + err := BulkCreateRelationships(dst._conn, relationships) + if err != nil { + return err + } + + return nil +} diff --git a/pkg/platform.go b/pkg/platform.go index 9348343..f865fb0 100644 --- a/pkg/platform.go +++ b/pkg/platform.go @@ -2,10 +2,33 @@ package synchronizator import ( "context" + sql "database/sql" "fmt" "slices" ) +type platform_has_collection struct { + Relationship +} + +func (col_has_node *platform_has_collection) GetClass() string { + return "PLATFORM_HAS_COLLECTION" +} + +func (col_has_node *platform_has_collection) GetMetadata() []byte { + return nil +} + +func (col_has_node *platform_has_collection) FromRelationship( + _class string, + metadata []byte, +) error { + if _class != "PLATFORM_HAS_COLLECTION" { + return fmt.Errorf("invalid class %s", _class) + } + return nil +} + // Platform represents a collection of nodes. It embeds a Node, so all the // node's functionality is available. type Platform struct { @@ -17,16 +40,90 @@ func (platform *Platform) AddCollection( name string, metadata, originalData []byte, ) (*Collection, error) { - collection, err := platform._conn.NewCollection(name, metadata, originalData) + var collection *Collection + + err := platform._conn.withTx(func(tx *sql.Tx) error { + node, err := platform._conn.newCollectionwithTx(tx, name, metadata, originalData) + if err != nil { + return err + } + + _, err = platform._conn.addRelationwithTx( + tx, + platform.Id, + &platform_has_collection{}, + node.Id, + ) + if err != nil { + return err + } + + collection = node + + return nil + }) if err != nil { return nil, err } platform.Collections = append(platform.Collections, collection) - return collection, nil } +func (platform *Platform) loadCollections() error { + sql := ` + WITH RECURSIVE NodeRelationships AS ( + SELECT + *, + relationships._class AS relationship_class, + relationships.metadata AS relationship_metadata + FROM + nodes as src + JOIN relationships ON src.id = relationships.node_from + WHERE src.id = $1 AND relationships._class = 'PLATFORM_HAS_COLLECTION' + ) + SELECT + dst.id, + dst.name, + dst._class, + dst.metadata, + dst.original_data + FROM + NodeRelationships + JOIN nodes as dst ON dst.id = NodeRelationships.node_to + ` + + rows, err := platform._conn.Query(sql, platform.Id) + if err != nil { + return err + } + + defer rows.Close() + + platform._relationships = make([]StandardRelationship, 0) + platform.Collections = make([]*Collection, 0) + for rows.Next() { + collection := &Collection{ + Node: Node{ + _conn: platform._conn, + }, + } + if err := rows.Scan( + &collection.Id, + &collection.name, + &collection._class, + &collection.metadata, + &collection.originalData, + ); err != nil { + return err + } + + platform.Collections = append(platform.Collections, collection) + } + + return nil +} + func (platform *Platform) GetDefaultCollection() (*Collection, error) { for _, collection := range platform.Collections { if collection.IsDefault() { diff --git a/pkg/synchronizator.go b/pkg/synchronizator.go index 966262b..03ba0b0 100644 --- a/pkg/synchronizator.go +++ b/pkg/synchronizator.go @@ -195,7 +195,7 @@ func (conn *DB) NewPlatform(name string, metadata []byte, originalData []byte) ( Collections: []*Collection{collection}, } - _, err = conn.addRelationwithTx(tx, platform.Id, &collection_has_node{}, collection.Id) + _, err = conn.addRelationwithTx(tx, platform.Id, &platform_has_collection{}, collection.Id) if err != nil { return err } @@ -340,6 +340,60 @@ func (conn *DB) GetNode(id int64) (*Node, error) { return &node, nil } +func (conn *DB) GetPlatform(id int64) (*Platform, error) { + node := Node{ + Id: id, + _conn: conn, + } + sql := "SELECT _class, name, metadata, original_data FROM nodes WHERE id = $1 AND _class = 'PLATFORM';" + conn.log(DEBUG, sql) + + err := conn.Connection.QueryRow(sql, id). + Scan(&node._class, &node.name, &node.metadata, &node.originalData) + if err != nil { + conn.log(DEBUG, err) + return nil, fmt.Errorf("No row matching id = %v", id) + } + + platform := &Platform{ + Node: node, + } + + err = platform.loadCollections() + if err != nil { + return nil, err + } + + return platform, nil +} + +func (conn *DB) GetCollection(id int64) (*Collection, error) { + node := Node{ + Id: id, + _conn: conn, + } + sql := "SELECT _class, name, metadata, original_data FROM nodes WHERE id = $1 AND _class = 'COLLECTION';" + conn.log(DEBUG, sql) + + err := conn.Connection.QueryRow(sql, id). + Scan(&node._class, &node.name, &node.metadata, &node.originalData) + if err != nil { + conn.log(DEBUG, err) + return nil, fmt.Errorf("No row matching id = %v", id) + } + + collection := &Collection{ + Node: node, + } + + err = collection.loadNodes() + if err != nil { + return nil, err + } + + return collection, nil +} + // Deletes a node with the provided id func (conn *DB) DeleteNode(id int64) error { tx, err := conn.Connection.Begin()