generated from alecodes/base-template
260 lines
6.1 KiB
Go
260 lines
6.1 KiB
Go
package synchronizator
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"math/rand"
|
|
"sync"
|
|
"time"
|
|
)
|
|
|
|
// Fetcher is the concurrent manager
|
|
// upon invocation, should create a worker pool of 1 to get the first set of results
|
|
// then base on the Patination Total and Limit, should distribute the workload
|
|
//
|
|
// It also needs to handle errors, rate-limits, retries strategies, and gracefull rejections
|
|
//
|
|
// It should return the pages not fetched for later retry
|
|
//
|
|
// Pagination should include a max-concurrent connection and rate-limit
|
|
// configuration to prevent having errors from external sources
|
|
//
|
|
// Maybe change the name to pagination or embed in another struct
|
|
|
|
type Fetcher = func(pagination Pagination) ([]*Collection, Pagination, error)
|
|
|
|
type Pagination struct {
|
|
Total int
|
|
HasMore bool
|
|
Limit int
|
|
Offset int
|
|
}
|
|
|
|
var StartPagination = Pagination{
|
|
Total: 0,
|
|
HasMore: false,
|
|
Limit: 10,
|
|
Offset: 0,
|
|
}
|
|
|
|
func NewRateLimit(request_per int, time_scale time.Duration) <-chan time.Time {
|
|
rate_limit := make(chan time.Time, request_per)
|
|
tickrate := time_scale / time.Duration(request_per)
|
|
|
|
for range request_per {
|
|
rate_limit <- time.Now()
|
|
}
|
|
|
|
go func() {
|
|
for t := range time.Tick(tickrate) {
|
|
rate_limit <- t
|
|
}
|
|
}()
|
|
|
|
return rate_limit
|
|
}
|
|
|
|
// T represent the argument of the function to run
|
|
// S represent the return value of the function to run
|
|
|
|
type WorkUnit[T, S any] struct {
|
|
argument T
|
|
result S
|
|
err error
|
|
timeout time.Duration
|
|
attempts uint8
|
|
}
|
|
|
|
// Work represents a function that processes a value of type S and returns a
|
|
// result of type T or an error.
|
|
type Work[T, S any] func(value T) (S, error)
|
|
|
|
// Worker represents a worker that processes tasks of type S and sends results
|
|
// of type T.
|
|
type Worker[T, S any] struct {
|
|
id uint8 // id is the unique identifier of the worker.
|
|
receptor <-chan WorkUnit[T, S] // receptor is the channel from which the worker receives tasks.
|
|
transmiter chan<- WorkUnit[T, S] // transmiter is the channel to which the worker sends results.
|
|
wg *sync.WaitGroup // wg is the wait group to synchronize the completion of tasks.
|
|
work Work[T, S] // work is the function that processes tasks.
|
|
rate_limit <-chan time.Time
|
|
}
|
|
|
|
type WorkConfig struct {
|
|
tasks_processed sync.WaitGroup
|
|
max_workers uint8
|
|
max_retries uint8
|
|
base_retry_time time.Duration
|
|
rate_limit <-chan time.Time
|
|
}
|
|
|
|
type Channels[T, S any] struct {
|
|
tasks_queue chan T
|
|
tasks_done chan S
|
|
tasks_failed chan error
|
|
units_dispatcher chan WorkUnit[T, S]
|
|
units_receiver chan WorkUnit[T, S]
|
|
}
|
|
|
|
func spawn_worker[T, S any](worker *Worker[T, S]) {
|
|
// TODO: handle tiemouts
|
|
for workUnit := range worker.receptor {
|
|
// Wait for rate-limit
|
|
<-worker.rate_limit
|
|
|
|
value, err := worker.work(workUnit.argument)
|
|
workUnit.result = value
|
|
workUnit.err = err
|
|
|
|
worker.transmiter <- workUnit
|
|
}
|
|
}
|
|
|
|
func handleFailedWorkUnit[T, S any](
|
|
workUnit *WorkUnit[T, S],
|
|
channels *Channels[T, S],
|
|
config *WorkConfig,
|
|
) bool {
|
|
if config.max_retries <= workUnit.attempts {
|
|
channels.tasks_failed <- workUnit.err
|
|
config.tasks_processed.Done()
|
|
return false
|
|
}
|
|
|
|
workUnit.attempts++
|
|
workUnit.err = nil
|
|
|
|
if workUnit.timeout == 0 {
|
|
workUnit.timeout = config.base_retry_time
|
|
} else {
|
|
workUnit.timeout *= 2
|
|
}
|
|
|
|
go func() {
|
|
jitter := time.Duration(rand.Int63n(int64(workUnit.timeout)))
|
|
timeout := workUnit.timeout + jitter
|
|
fmt.Printf(
|
|
"Unit with value %v failed for %v time, retrying in: %v\n",
|
|
workUnit.argument,
|
|
workUnit.attempts,
|
|
timeout,
|
|
)
|
|
time.Sleep(timeout)
|
|
channels.units_dispatcher <- *workUnit
|
|
}()
|
|
|
|
return true
|
|
}
|
|
|
|
// this is in charge of what we return to the user
|
|
// exits when units_receiver is closed, which is done when the workers are closed
|
|
func listenForWorkResults[T, S any](
|
|
ctx context.Context,
|
|
channels *Channels[T, S],
|
|
config *WorkConfig,
|
|
) {
|
|
for {
|
|
select {
|
|
case workUnit, ok := <-channels.units_receiver:
|
|
if !ok {
|
|
return
|
|
}
|
|
|
|
if workUnit.err != nil {
|
|
handleFailedWorkUnit(&workUnit, channels, config)
|
|
continue
|
|
}
|
|
|
|
// Send message to user
|
|
channels.tasks_done <- workUnit.result
|
|
config.tasks_processed.Done()
|
|
case <-ctx.Done():
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
// this is in charge of receive values and transform them into work units
|
|
// stops when the queue is empty
|
|
func workUnitDispatcher[T, S any](
|
|
ctx context.Context,
|
|
finish context.CancelFunc,
|
|
channels *Channels[T, S],
|
|
config *WorkConfig,
|
|
) {
|
|
defer stopProcessingWork(finish, channels, config)
|
|
|
|
for {
|
|
select {
|
|
case value, ok := <-channels.tasks_queue:
|
|
if !ok {
|
|
return
|
|
}
|
|
|
|
workUnit := WorkUnit[T, S]{
|
|
argument: value,
|
|
timeout: 0,
|
|
attempts: 0,
|
|
}
|
|
channels.units_dispatcher <- workUnit
|
|
config.tasks_processed.Add(1)
|
|
|
|
case <-ctx.Done():
|
|
fmt.Println("context done")
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
// this wait for all workers to stop, then close the unit channels where the workers send values
|
|
// prevent closing the channel before the workers finish
|
|
func stopProcessingWork[T, S any](
|
|
finish context.CancelFunc,
|
|
channels *Channels[T, S],
|
|
config *WorkConfig,
|
|
) {
|
|
config.tasks_processed.Wait()
|
|
|
|
close(channels.units_receiver)
|
|
close(channels.units_dispatcher)
|
|
close(channels.tasks_done)
|
|
close(channels.tasks_failed)
|
|
|
|
finish()
|
|
}
|
|
|
|
func asyncTaskRunner[T, S any](
|
|
ctx context.Context,
|
|
inbound chan T,
|
|
config *WorkConfig,
|
|
work Work[T, S],
|
|
) (<-chan S, <-chan error, <-chan struct{}) {
|
|
channel_size := config.max_workers * 3
|
|
|
|
done, finish := context.WithCancel(ctx)
|
|
|
|
channels := &Channels[T, S]{
|
|
tasks_queue: inbound,
|
|
tasks_done: make(chan S),
|
|
tasks_failed: make(chan error),
|
|
units_dispatcher: make(chan WorkUnit[T, S], channel_size),
|
|
units_receiver: make(chan WorkUnit[T, S], channel_size),
|
|
}
|
|
|
|
// create pool of workers
|
|
for i := range config.max_workers {
|
|
worker := &Worker[T, S]{
|
|
id: uint8(i),
|
|
receptor: channels.units_dispatcher,
|
|
transmiter: channels.units_receiver,
|
|
rate_limit: config.rate_limit,
|
|
work: work,
|
|
}
|
|
|
|
go spawn_worker(worker)
|
|
}
|
|
|
|
go listenForWorkResults(done, channels, config)
|
|
go workUnitDispatcher(done, finish, channels, config)
|
|
return channels.tasks_done, channels.tasks_failed, done.Done()
|
|
}
|