synchronizator-go/pkg/fetcher.go

package synchronizator

import (
	"fmt"
	"iter"
	"math/rand"
	"sync"
	"time"
)

// Fetcher is the concurrent manager
// upon invocation, should create a worker pool of 1 to get the first set of results
// then base on the Patination Total and Limit, should distribute the workload
//
// It also needs to handle errors, rate-limits, retries strategies, and gracefull rejections
//
// It should return the pages not fetched for later retry
//
// Pagination should include a max-concurrent connection and rate-limit
// configuration to prevent having errors from external sources
//
// Maybe change the name to pagination or embed in another struct

type Fetcher = func(pagination Pagination) ([]*Collection, Pagination, error)

type Pagination struct {
	Total   int
	HasMore bool
	Limit   int
	Offset  int
}

var StartPagination = Pagination{
	Total:   0,
	HasMore: false,
	Limit:   10,
	Offset:  0,
}

func NewRateLimit(request_per int, time_scale time.Duration) <-chan time.Time {
	rate_limit := make(chan time.Time, request_per)
	tickrate := time_scale / time.Duration(request_per)

	for range request_per {
		rate_limit <- time.Now()
	}

	go func() {
		for t := range time.Tick(tickrate) {
			rate_limit <- t
		}
	}()

	return rate_limit
}

// T represent the argument of the function to run
// S represent the return value of the function to run

type WorkUnit[T, S any] struct {
	argument T
	result   S
	err      error
	timeout  time.Duration
	attempts uint8
}

// Work represents a function that processes a value of type S and returns a
// result of type T or an error.
type Work[T, S any] func(value T) (S, error)

// Worker represents a worker that processes tasks of type S and sends results
// of type T.
type Worker[T, S any] struct {
	id         uint8                 // id is the unique identifier of the worker.
	receptor   <-chan WorkUnit[T, S] // receptor is the channel from which the worker receives tasks.
	transmiter chan<- WorkUnit[T, S] // transmiter is the channel to which the worker sends results.
	wg         *sync.WaitGroup       // wg is the wait group to synchronize the completion of tasks.
	work       Work[T, S]            // work is the function that processes tasks.
	rate_limit <-chan time.Time
}

type WorkerManager[T, S any] struct {
	queue_tasks        uint
	processed_tasks    uint
	active_workers     sync.WaitGroup
	is_open_to_work    bool
	max_retries        uint8
	base_retry_time    time.Duration
	failed_units       []*WorkUnit[T, S]
	workers_receptor   chan WorkUnit[T, S]
	workers_transmiter chan WorkUnit[T, S]
}

func (manager *WorkerManager[T, S]) AddWork(value T) error {
	if !manager.is_open_to_work {
		return fmt.Errorf("The manager is closed to add more work.")
	}

	workUnit := WorkUnit[T, S]{
		argument: value,
		timeout:  0,
		attempts: 0,
	}

	manager.workers_receptor <- workUnit
	manager.queue_tasks++
	return nil
}

func (manager *WorkerManager[T, S]) Stop() {
	// Stop receiving new units of work
	manager.is_open_to_work = false
}

func (manager *WorkerManager[T, S]) GetSingleWorkUnit() S {
	workUnit := <-manager.workers_transmiter

	return workUnit.result
}

func (manager *WorkerManager[T, S]) handleFailedWorkUnit(workUnit *WorkUnit[T, S]) bool {
	if manager.max_retries <= workUnit.attempts {
		manager.failed_units = append(manager.failed_units, workUnit)
		manager.processed_tasks++
		return false
	}

	workUnit.attempts++

	if workUnit.timeout == 0 {
		workUnit.timeout = manager.base_retry_time
	} else {
		workUnit.timeout *= 2
	}

	go func() {
		jitter := time.Duration(rand.Int63n(int64(workUnit.timeout)))
		timeout := workUnit.timeout + jitter
		fmt.Printf(
			"Unit failed for %v time, retrying in: %v\n",
			workUnit.attempts,
			timeout,
		)
		time.Sleep(timeout)
		manager.workers_receptor <- *workUnit
	}()

	return true
}

func (manager *WorkerManager[T, S]) increment_processed_units() {
	manager.processed_tasks++
	fmt.Printf("processed_tasks: %v\n", manager.processed_tasks)

	if manager.processed_tasks >= manager.queue_tasks {
		close(manager.workers_receptor)
	}
}

func (manager *WorkerManager[T, S]) handleWorkUnit(workUnit *WorkUnit[T, S]) bool {
	if workUnit.err != nil {
		can_try_again := manager.handleFailedWorkUnit(workUnit)

		if !can_try_again {
			manager.increment_processed_units()
		}
		return false
	}

	manager.increment_processed_units()

	return true
}

func (manager *WorkerManager[T, S]) GetWorkUnit() iter.Seq[S] {
	// send a message through the done channel when all workers have stopped
	done_channel := make(chan bool)

	go func() {
		manager.active_workers.Wait()
		close(done_channel)
	}()

	manager.is_open_to_work = false

	return func(yield func(S) bool) {
		for {
			// TODO: handle tiemouts
			select {
			case workUnit := <-manager.workers_transmiter:
				if is_successfull := manager.handleWorkUnit(&workUnit); !is_successfull {
					continue
				}

				if !yield(workUnit.result) {
					return
				}
			case <-done_channel:
				close(manager.workers_transmiter)
				return
			}
		}
	}
}

func (manager *WorkerManager[T, S]) GetFailedUnits() []*WorkUnit[T, S] {
	return manager.failed_units
}

func spawn_worker[T, S any](worker *Worker[T, S]) {
	defer worker.wg.Done()

	for workUnit := range worker.receptor {
		// Wait for rate-limit
		<-worker.rate_limit

		value, err := worker.work(workUnit.argument)
		workUnit.result = value
		workUnit.err = err

		worker.transmiter <- workUnit
	}
}

func createWorkerPool[T, S any](
	max_workers uint8,
	max_retries uint8,
	rate_limit <-chan time.Time,
	work Work[T, S],
) *WorkerManager[T, S] {
	channel_size := max_workers * 3

	manager := &WorkerManager[T, S]{
		max_retries:        max_retries,
		base_retry_time:    time.Second,
		workers_receptor:   make(chan WorkUnit[T, S], channel_size),
		workers_transmiter: make(chan WorkUnit[T, S], channel_size),
	}

	// create pool of workers
	for i := range max_workers {
		worker := &Worker[T, S]{
			id:         uint8(i),
			receptor:   manager.workers_receptor,
			transmiter: manager.workers_transmiter,
			rate_limit: rate_limit,
			wg:         &manager.active_workers,
			work:       work,
		}

		go spawn_worker(worker)
		manager.active_workers.Add(1)
	}

	manager.is_open_to_work = true

	return manager
}