package synchronizator import ( "fmt" "iter" "math/rand" "sync" "time" ) // Fetcher is the concurrent manager // upon invocation, should create a worker pool of 1 to get the first set of results // then base on the Patination Total and Limit, should distribute the workload // // It also needs to handle errors, rate-limits, retries strategies, and gracefull rejections // // It should return the pages not fetched for later retry // // Pagination should include a max-concurrent connection and rate-limit // configuration to prevent having errors from external sources // // Maybe change the name to pagination or embed in another struct type Fetcher = func(pagination Pagination) ([]*Collection, Pagination, error) type Pagination struct { Total int HasMore bool Limit int Offset int } var StartPagination = Pagination{ Total: 0, HasMore: false, Limit: 10, Offset: 0, } func NewRateLimit(request_per int, time_scale time.Duration) <-chan time.Time { rate_limit := make(chan time.Time, request_per) tickrate := time_scale / time.Duration(request_per) for range request_per { rate_limit <- time.Now() } go func() { for t := range time.Tick(tickrate) { rate_limit <- t } }() return rate_limit } // T represent the argument of the function to run // S represent the return value of the function to run type WorkUnit[T, S any] struct { argument T result S err error timeout time.Duration attempts uint8 } // Work represents a function that processes a value of type S and returns a // result of type T or an error. type Work[T, S any] func(value T) (S, error) // Worker represents a worker that processes tasks of type S and sends results // of type T. type Worker[T, S any] struct { id uint8 // id is the unique identifier of the worker. receptor <-chan WorkUnit[T, S] // receptor is the channel from which the worker receives tasks. transmiter chan<- WorkUnit[T, S] // transmiter is the channel to which the worker sends results. wg *sync.WaitGroup // wg is the wait group to synchronize the completion of tasks. work Work[T, S] // work is the function that processes tasks. rate_limit <-chan time.Time } type WorkerManager[T, S any] struct { queue_tasks uint processed_tasks uint active_workers sync.WaitGroup is_open_to_work bool max_retries uint8 base_retry_time time.Duration failed_units []*WorkUnit[T, S] workers_receptor chan WorkUnit[T, S] workers_transmiter chan WorkUnit[T, S] } func (manager *WorkerManager[T, S]) AddWork(value T) error { if !manager.is_open_to_work { return fmt.Errorf("The manager is closed to add more work.") } workUnit := WorkUnit[T, S]{ argument: value, timeout: 0, attempts: 0, } manager.workers_receptor <- workUnit manager.queue_tasks++ return nil } func (manager *WorkerManager[T, S]) Stop() { // Stop receiving new units of work manager.is_open_to_work = false } func (manager *WorkerManager[T, S]) GetSingleWorkUnit() S { workUnit := <-manager.workers_transmiter return workUnit.result } func (manager *WorkerManager[T, S]) handleFailedWorkUnit(workUnit *WorkUnit[T, S]) bool { if manager.max_retries <= workUnit.attempts { manager.failed_units = append(manager.failed_units, workUnit) manager.processed_tasks++ return false } workUnit.attempts++ if workUnit.timeout == 0 { workUnit.timeout = manager.base_retry_time } else { workUnit.timeout *= 2 } go func() { jitter := time.Duration(rand.Int63n(int64(workUnit.timeout))) timeout := workUnit.timeout + jitter fmt.Printf( "Unit failed for %v time, retrying in: %v\n", workUnit.attempts, timeout, ) time.Sleep(timeout) manager.workers_receptor <- *workUnit }() return true } func (manager *WorkerManager[T, S]) increment_processed_units() { manager.processed_tasks++ fmt.Printf("processed_tasks: %v\n", manager.processed_tasks) if manager.processed_tasks >= manager.queue_tasks { close(manager.workers_receptor) } } func (manager *WorkerManager[T, S]) handleWorkUnit(workUnit *WorkUnit[T, S]) bool { if workUnit.err != nil { can_try_again := manager.handleFailedWorkUnit(workUnit) if !can_try_again { manager.increment_processed_units() } return false } manager.increment_processed_units() return true } func (manager *WorkerManager[T, S]) GetWorkUnit() iter.Seq[S] { // send a message through the done channel when all workers have stopped done_channel := make(chan bool) go func() { manager.active_workers.Wait() close(done_channel) }() manager.is_open_to_work = false return func(yield func(S) bool) { for { // TODO: handle tiemouts select { case workUnit := <-manager.workers_transmiter: if is_successfull := manager.handleWorkUnit(&workUnit); !is_successfull { continue } if !yield(workUnit.result) { return } case <-done_channel: close(manager.workers_transmiter) return } } } } func (manager *WorkerManager[T, S]) GetFailedUnits() []*WorkUnit[T, S] { return manager.failed_units } func spawn_worker[T, S any](worker *Worker[T, S]) { defer worker.wg.Done() for workUnit := range worker.receptor { // Wait for rate-limit <-worker.rate_limit value, err := worker.work(workUnit.argument) workUnit.result = value workUnit.err = err worker.transmiter <- workUnit } } func createWorkerPool[T, S any]( max_workers uint8, max_retries uint8, rate_limit <-chan time.Time, work Work[T, S], ) *WorkerManager[T, S] { channel_size := max_workers * 3 manager := &WorkerManager[T, S]{ max_retries: max_retries, base_retry_time: time.Second, workers_receptor: make(chan WorkUnit[T, S], channel_size), workers_transmiter: make(chan WorkUnit[T, S], channel_size), } // create pool of workers for i := range max_workers { worker := &Worker[T, S]{ id: uint8(i), receptor: manager.workers_receptor, transmiter: manager.workers_transmiter, rate_limit: rate_limit, wg: &manager.active_workers, work: work, } go spawn_worker(worker) manager.active_workers.Add(1) } manager.is_open_to_work = true return manager }