refactor: add separation of concerns in fetcher and platform

This commit is contained in:
Alexander Navarro 2024-11-30 18:14:04 -03:00
parent 96af51ee68
commit 4094f71a7d
5 changed files with 227 additions and 145 deletions

View file

@ -89,16 +89,12 @@ func (collection *Collection) IsDefault() bool {
type NodeFetcher = func(metadata []byte, pagination Pagination) ([]*Node, Pagination, error)
func (collection *Collection) FetchNodes(fetcher NodeFetcher, start_pagination Pagination) error {
nodes, pagination, err := fetcher(collection.GetMetadata(), start_pagination)
nodes, _, err := fetcher(collection.GetMetadata(), start_pagination)
if err != nil {
return err
}
collection.childs = slices.Concat(collection.childs, nodes)
if pagination.HasMore {
return collection.FetchNodes(fetcher, pagination)
}
err = BulkCreateNode(collection._conn, collection.childs)
if err != nil {
return err

View file

@ -1,45 +1,62 @@
package synchronizator
import "fmt"
import (
"context"
"fmt"
"slices"
)
// StartPagination is the initial pagination configuration.
// The initial pagination configuration.
var StartPagination = Pagination{
Total: 0,
Pages: 0,
HasMore: false,
Limit: 10,
Offset: 0,
Total: 0,
Pages: 0,
Limit: 10,
Offset: 0,
}
// Represents the response from a fetch operation.
// It includes pagination information and the actual response data.
type FetchResponse[T any] struct {
Pagination
Response T
Response T // The actual response data.
}
// Pagination represents the pagination information for fetching data.
// Represents the pagination information for fetching data.
type Pagination struct {
Total uint64 // Total number of items.
Pages uint64
HasMore bool // Indicates if there are more items to fetch.
Limit uint64 // Number of items to fetch per request.
Offset uint64 // Offset for the next set of items to fetch.
Total uint64 // Total number of items.
Pages uint64 // Total number of pages.
Limit uint64 // Number of items to fetch per request.
Offset uint64 // Offset for the next set of items to fetch.
}
// Calculates the number of pages based on the total items, offset, and limit.
// It returns the calculated number of pages and an error if any.
//
// Parameters:
// - pagination: The pagination settings.
// - offset: The offset value.
//
// Returns:
// - uint64: The calculated number of pages.
// - error: The error if any occurred.
func calculatePages(pagination *Pagination, offset uint64) (uint64, error) {
if pagination.Limit == 0 {
return 0, fmt.Errorf("division by zero")
return 0, fmt.Errorf("The limit cannot be 0 to calculate the pages")
}
dividend := pagination.Total - offset
divisor := pagination.Limit
result := dividend / divisor
if dividend%divisor != 0 {
result++
}
result := (pagination.Total - offset) / pagination.Limit
return result, nil
}
// Calculates the page number based on the offset and pagination settings.
// It returns the calculated page number and an error if any.
//
// Parameters:
// - pagination: The pagination settings.
//
// Returns:
// - uint64: The calculated page number.
// - error: The error if any occurred.
func getPageByOffset(pagination *Pagination) (uint64, error) {
if pagination.Pages == 0 {
return 0, fmt.Errorf("division by zero")
@ -51,3 +68,135 @@ func getPageByOffset(pagination *Pagination) (uint64, error) {
return result, nil
}
// Fetches data with pagination support.
// It returns a slice of fetched data and an error if any.
//
// The pagination is gonna try to guess the total amount of pages if not
// provided by executing a first fetch and then calculating with the
// pagination.Total value returned by the fetcher function.
//
// Parameters:
// - ctx: The context to control cancellation.
// - pool_config: The configuration for the worker pool.
// - fetcher: The fetcher function to execute the work.
// - start_pagination: The initial pagination settings.
//
// Returns:
// - []T: A slice of fetched data.
// - error: The error if any occurred.
func fetchWithPagination[T any](
ctx context.Context,
pool_config *WorkConfig,
fetcher Work[Pagination, FetchResponse[[]T]],
start_pagination Pagination,
) ([]T, error) {
ctx, cancel := context.WithCancel(ctx)
defer cancel()
tasks := make(chan Pagination)
values := make([]T, 0)
results, errors, doneChannel := asyncTaskRunner(
ctx,
tasks,
pool_config,
fetcher,
)
var current_page uint64 = 0
if start_pagination.Pages == 0 {
tasks <- start_pagination
response, _, err := waitForFetchResponse(ctx, results, errors, nil)
if err != nil {
return nil, err
}
values = slices.Concat(values, response.Response)
start_pagination.Total = response.Pagination.Total
pages, err := calculatePages(&response.Pagination, start_pagination.Offset)
if err != nil {
return nil, err
}
start_pagination.Pages = pages
current_page++
}
page_offset, err := getPageByOffset(&start_pagination)
if err != nil {
return nil, err
}
current_page += page_offset
for current_page <= start_pagination.Pages {
fmt.Printf("Total pages: %v, Current page: %v\n", start_pagination.Pages, current_page)
page := start_pagination
page.Offset = current_page * page.Limit
tasks <- page
current_page++
}
close(tasks)
for {
response, isWorkDone, err := waitForFetchResponse(ctx, results, errors, doneChannel)
if err != nil {
return nil, err
}
if isWorkDone {
break
}
if response == nil {
continue
}
values = slices.Concat(values, response.Response)
}
return values, nil
}
// Waits for a fetch response from the provided channels.
// It returns the fetch response, a boolean indicating if the work is done, and an error if any.
//
// Parameters:
// - ctx: The context to control cancellation.
// - results: A channel to receive fetch responses.
// - errors: A channel to receive errors.
// - done: A channel to signal completion.
//
// Returns:
// - *FetchResponse[[]T]: The fetch response if available.
// - bool: True if the work is done or the operation was cancelled.
// - error: The error if any occurred.
func waitForFetchResponse[T any](
ctx context.Context,
results <-chan FetchResponse[[]T],
errors <-chan error,
done <-chan struct{},
) (*FetchResponse[[]T], bool, error) {
var values *FetchResponse[[]T]
select {
case response, ok := <-results:
if !ok {
return nil, false, nil
}
values = &response
case err, ok := <-errors:
if !ok {
return nil, false, nil
}
return nil, true, err
case <-ctx.Done():
return nil, true, nil
case <-done:
return nil, true, nil
}
return values, false, nil
}

View file

@ -4,113 +4,41 @@ import (
"context"
"fmt"
"slices"
"time"
)
// Utility struct to represent a collection of nodes, it's a [Node] itself so all
// the node's functionality is available.
// Platform represents a collection of nodes. It embeds a Node, so all the
// node's functionality is available.
type Platform struct {
Node // Underlaying node info
Node // Underlying node info
Collections []*Collection // Child nodes
}
// Is a type alias for FetchResponse containing a slice of Collection pointers.
type FetchCollectionResponse = FetchResponse[[]*Collection]
// Fetches collections using the provided fetcher and pagination settings.
// It updates the platform's collections and creates relationships between the platform and the collections.
//
// Parameters:
// - ctx: The context to control cancellation.
// - fetcher: The fetcher function to execute the work.
// - start_pagination: The initial pagination settings.
// - pool_config: The configuration for the worker pool.
//
// Returns:
// - error: The error if any occurred.
func (platform *Platform) FetchCollections(
ctx context.Context,
fetcher Work[Pagination, FetchCollectionResponse],
start_pagination Pagination,
startPagination Pagination,
poolConfig *WorkConfig,
) error {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
config := &WorkConfig{
amount_of_workers: 5,
max_retries: 2,
base_retry_time: time.Second,
rate_limit: NewRateLimiter(5, time.Minute),
timeout: time.Second * 2,
}
tasks := make(chan Pagination)
results, errors, done := asyncTaskRunner(
ctx,
tasks,
config,
fetcher,
)
var current_page uint64 = 0
if start_pagination.Pages == 0 {
// do the first fetch
tasks <- start_pagination
select {
case response, ok := <-results:
if !ok {
break
}
platform.Collections = slices.Concat(platform.Collections, response.Response)
pages, err := calculatePages(&response.Pagination, start_pagination.Offset)
if err != nil {
return err
}
start_pagination.Pages = pages
start_pagination.Total = response.Pagination.Total
current_page++
case error, ok := <-errors:
if !ok {
return fmt.Errorf("Could not do first fetch to calculate pages: %v\n", error)
}
case <-ctx.Done():
break
case <-done:
break
}
}
page_offset, err := getPageByOffset(&start_pagination)
values, err := fetchWithPagination(ctx, poolConfig, fetcher, startPagination)
if err != nil {
return err
}
current_page += page_offset
fmt.Printf("Total pages: %v, Current page: %v\n", start_pagination.Pages, current_page)
for current_page <= start_pagination.Pages {
page := start_pagination
page.Offset = current_page * page.Limit
tasks <- page
current_page++
}
close(tasks)
loop:
for {
select {
case response, ok := <-results:
if !ok {
continue
}
platform.Collections = slices.Concat(platform.Collections, response.Response)
case error, ok := <-errors:
if !ok {
continue
}
fmt.Printf("There was an error: %v\n", error)
case <-ctx.Done():
break loop
case <-done:
break loop
}
}
platform.Collections = slices.Concat(platform.Collections, values)
fmt.Printf("Collections: %v\n", len(platform.Collections))

View file

@ -61,12 +61,12 @@ type Worker[T, S any] struct {
// - rate_limit: default is 10 requests per second
// - timeout: if 0, timeout is disabled
type WorkConfig struct {
tasks_processed sync.WaitGroup // Wait group to synchronize task completion.
amount_of_workers uint8 // Number of workers to spawn.
max_retries uint8 // Maximum number of retries for a task before beign cancelled.
base_retry_time time.Duration // Base factor to wait for before retrying a task.
rate_limit <-chan time.Time // Ticker to limit the amount of request. Is recomended to pass the result of calling NewRateLimiter().
timeout time.Duration // Maximum execution time allowed for a task before beign canceled.
TasksProcessed sync.WaitGroup // Wait group to synchronize task completion.
AmountOfWorkers uint8 // Number of workers to spawn.
MaxRetries uint8 // Maximum number of retries for a task before beign cancelled.
BaseRetryTime time.Duration // Base factor to wait for before retrying a task.
RateLimit <-chan time.Time // Ticker to limit the amount of request. Is recomended to pass the result of calling NewRateLimiter().
Timeout time.Duration // Maximum execution time allowed for a task before beign canceled.
}
// Group the channels used for task processing for easy access between functions.
@ -142,9 +142,9 @@ func handleFailedWorkUnit[T, S any](
config *WorkConfig,
) bool {
// If retries == 0, retries are disabled, return immediately
if config.max_retries == 0 || config.max_retries <= workUnit.attempts {
if config.MaxRetries == 0 || config.MaxRetries <= workUnit.attempts {
channels.tasks_failed <- workUnit.err
config.tasks_processed.Done()
config.TasksProcessed.Done()
return false
}
@ -152,7 +152,7 @@ func handleFailedWorkUnit[T, S any](
workUnit.err = nil
if workUnit.timeout == 0 {
workUnit.timeout = config.base_retry_time
workUnit.timeout = config.BaseRetryTime
} else {
workUnit.timeout *= 2
}
@ -200,7 +200,7 @@ func listenForWorkResults[T, S any](
// Send message to user
channels.tasks_done <- workUnit.result
config.tasks_processed.Done()
config.TasksProcessed.Done()
case <-ctx.Done():
return
}
@ -237,7 +237,7 @@ func workUnitDispatcher[T, S any](
attempts: 0,
}
channels.units_dispatcher <- workUnit
config.tasks_processed.Add(1)
config.TasksProcessed.Add(1)
case <-ctx.Done():
fmt.Println("context done")
@ -258,7 +258,7 @@ func stopProcessingWork[T, S any](
channels *Channels[T, S],
config *WorkConfig,
) {
config.tasks_processed.Wait()
config.TasksProcessed.Wait()
close(channels.units_receiver)
close(channels.units_dispatcher)
@ -289,20 +289,20 @@ func asyncTaskRunner[T, S any](
work Work[T, S],
) (<-chan S, <-chan error, <-chan struct{}) {
// Set default values for WorkConfig if not provided
if config.amount_of_workers == 0 {
config.amount_of_workers = 5
if config.AmountOfWorkers == 0 {
config.AmountOfWorkers = 5
}
if config.base_retry_time == 0 {
config.base_retry_time = 1 * time.Second
if config.BaseRetryTime == 0 {
config.BaseRetryTime = 1 * time.Second
}
if config.rate_limit == nil {
config.rate_limit = NewRateLimiter(10, time.Second)
if config.RateLimit == nil {
config.RateLimit = NewRateLimiter(10, time.Second)
}
// Ensure a clean wait group is ussed
config.tasks_processed = sync.WaitGroup{}
config.TasksProcessed = sync.WaitGroup{}
channel_size := config.amount_of_workers * 3
channel_size := config.AmountOfWorkers * 3
done, finish := context.WithCancel(ctx)
@ -315,13 +315,13 @@ func asyncTaskRunner[T, S any](
}
// create pool of workers
for i := range config.amount_of_workers {
for i := range config.AmountOfWorkers {
worker := &Worker[T, S]{
id: uint8(i),
receptor: channels.units_dispatcher,
transmitter: channels.units_receiver,
rate_limit: config.rate_limit,
timeout: config.timeout,
rate_limit: config.RateLimit,
timeout: config.Timeout,
work: work,
}