package synchronizator import ( "context" "fmt" "math/rand" "sync" "time" ) // Fetcher is the concurrent manager // upon invocation, should create a worker pool of 1 to get the first set of results // then base on the Patination Total and Limit, should distribute the workload // // It also needs to handle errors, rate-limits, retries strategies, and gracefull rejections // // It should return the pages not fetched for later retry // // Pagination should include a max-concurrent connection and rate-limit // configuration to prevent having errors from external sources // // Maybe change the name to pagination or embed in another struct type Fetcher = func(pagination Pagination) ([]*Collection, Pagination, error) type Pagination struct { Total int HasMore bool Limit int Offset int } var StartPagination = Pagination{ Total: 0, HasMore: false, Limit: 10, Offset: 0, } func NewRateLimit(request_per int, time_scale time.Duration) <-chan time.Time { rate_limit := make(chan time.Time, request_per) tickrate := time_scale / time.Duration(request_per) for range request_per { rate_limit <- time.Now() } go func() { for t := range time.Tick(tickrate) { rate_limit <- t } }() return rate_limit } // T represent the argument of the function to run // S represent the return value of the function to run type WorkUnit[T, S any] struct { argument T result S err error timeout time.Duration attempts uint8 } // Work represents a function that processes a value of type S and returns a // result of type T or an error. type Work[T, S any] func(value T) (S, error) // Worker represents a worker that processes tasks of type S and sends results // of type T. type Worker[T, S any] struct { id uint8 // id is the unique identifier of the worker. receptor <-chan WorkUnit[T, S] // receptor is the channel from which the worker receives tasks. transmiter chan<- WorkUnit[T, S] // transmiter is the channel to which the worker sends results. wg *sync.WaitGroup // wg is the wait group to synchronize the completion of tasks. work Work[T, S] // work is the function that processes tasks. rate_limit <-chan time.Time } type WorkConfig struct { tasks_processed sync.WaitGroup max_workers uint8 max_retries uint8 base_retry_time time.Duration rate_limit <-chan time.Time } type Channels[T, S any] struct { tasks_queue chan T tasks_done chan S tasks_failed chan error units_dispatcher chan WorkUnit[T, S] units_receiver chan WorkUnit[T, S] } func spawn_worker[T, S any](worker *Worker[T, S]) { // TODO: handle tiemouts for workUnit := range worker.receptor { // Wait for rate-limit <-worker.rate_limit value, err := worker.work(workUnit.argument) workUnit.result = value workUnit.err = err worker.transmiter <- workUnit } } func handleFailedWorkUnit[T, S any]( workUnit *WorkUnit[T, S], channels *Channels[T, S], config *WorkConfig, ) bool { if config.max_retries <= workUnit.attempts { channels.tasks_failed <- workUnit.err config.tasks_processed.Done() return false } workUnit.attempts++ workUnit.err = nil if workUnit.timeout == 0 { workUnit.timeout = config.base_retry_time } else { workUnit.timeout *= 2 } go func() { jitter := time.Duration(rand.Int63n(int64(workUnit.timeout))) timeout := workUnit.timeout + jitter fmt.Printf( "Unit with value %v failed for %v time, retrying in: %v\n", workUnit.argument, workUnit.attempts, timeout, ) time.Sleep(timeout) channels.units_dispatcher <- *workUnit }() return true } // this is in charge of what we return to the user // exits when units_receiver is closed, which is done when the workers are closed func listenForWorkResults[T, S any]( ctx context.Context, channels *Channels[T, S], config *WorkConfig, ) { for { select { case workUnit, ok := <-channels.units_receiver: if !ok { return } if workUnit.err != nil { handleFailedWorkUnit(&workUnit, channels, config) continue } // Send message to user channels.tasks_done <- workUnit.result config.tasks_processed.Done() case <-ctx.Done(): return } } } // this is in charge of receive values and transform them into work units // stops when the queue is empty func workUnitDispatcher[T, S any]( ctx context.Context, finish context.CancelFunc, channels *Channels[T, S], config *WorkConfig, ) { defer stopProcessingWork(finish, channels, config) for { select { case value, ok := <-channels.tasks_queue: if !ok { return } workUnit := WorkUnit[T, S]{ argument: value, timeout: 0, attempts: 0, } channels.units_dispatcher <- workUnit config.tasks_processed.Add(1) case <-ctx.Done(): fmt.Println("context done") return } } } // this wait for all workers to stop, then close the unit channels where the workers send values // prevent closing the channel before the workers finish func stopProcessingWork[T, S any]( finish context.CancelFunc, channels *Channels[T, S], config *WorkConfig, ) { config.tasks_processed.Wait() close(channels.units_receiver) close(channels.units_dispatcher) close(channels.tasks_done) close(channels.tasks_failed) finish() } func asyncTaskRunner[T, S any]( ctx context.Context, inbound chan T, config *WorkConfig, work Work[T, S], ) (<-chan S, <-chan error, <-chan struct{}) { channel_size := config.max_workers * 3 done, finish := context.WithCancel(ctx) channels := &Channels[T, S]{ tasks_queue: inbound, tasks_done: make(chan S), tasks_failed: make(chan error), units_dispatcher: make(chan WorkUnit[T, S], channel_size), units_receiver: make(chan WorkUnit[T, S], channel_size), } // create pool of workers for i := range config.max_workers { worker := &Worker[T, S]{ id: uint8(i), receptor: channels.units_dispatcher, transmiter: channels.units_receiver, rate_limit: config.rate_limit, work: work, } go spawn_worker(worker) } go listenForWorkResults(done, channels, config) go workUnitDispatcher(done, finish, channels, config) return channels.tasks_done, channels.tasks_failed, done.Done() }