synchronizator-go/pkg/fetcher.go

package synchronizator

import (
	"context"
	"fmt"
	"math/rand"
	"sync"
	"time"
)

// Fetcher is the concurrent manager
// upon invocation, should create a worker pool of 1 to get the first set of results
// then base on the Patination Total and Limit, should distribute the workload
//
// It also needs to handle errors, rate-limits, retries strategies, and gracefull rejections
//
// It should return the pages not fetched for later retry
//
// Pagination should include a max-concurrent connection and rate-limit
// configuration to prevent having errors from external sources
//
// Maybe change the name to pagination or embed in another struct

type Fetcher = func(pagination Pagination) ([]*Collection, Pagination, error)

type Pagination struct {
	Total   int
	HasMore bool
	Limit   int
	Offset  int
}

var StartPagination = Pagination{
	Total:   0,
	HasMore: false,
	Limit:   10,
	Offset:  0,
}

func NewRateLimit(request_per int, time_scale time.Duration) <-chan time.Time {
	rate_limit := make(chan time.Time, request_per)
	tickrate := time_scale / time.Duration(request_per)

	for range request_per {
		rate_limit <- time.Now()
	}

	go func() {
		for t := range time.Tick(tickrate) {
			rate_limit <- t
		}
	}()

	return rate_limit
}

// T represent the argument of the function to run
// S represent the return value of the function to run

type WorkUnit[T, S any] struct {
	argument T
	result   S
	err      error
	timeout  time.Duration
	attempts uint8
}

// Work represents a function that processes a value of type S and returns a
// result of type T or an error.
type Work[T, S any] func(value T) (S, error)

// Worker represents a worker that processes tasks of type S and sends results
// of type T.
type Worker[T, S any] struct {
	id         uint8                 // id is the unique identifier of the worker.
	receptor   <-chan WorkUnit[T, S] // receptor is the channel from which the worker receives tasks.
	transmiter chan<- WorkUnit[T, S] // transmiter is the channel to which the worker sends results.
	wg         *sync.WaitGroup       // wg is the wait group to synchronize the completion of tasks.
	work       Work[T, S]            // work is the function that processes tasks.
	rate_limit <-chan time.Time
}

type WorkConfig struct {
	tasks_processed sync.WaitGroup
	max_workers     uint8
	max_retries     uint8
	base_retry_time time.Duration
	rate_limit      <-chan time.Time
}

type Channels[T, S any] struct {
	tasks_queue      chan T
	tasks_done       chan S
	tasks_failed     chan error
	units_dispatcher chan WorkUnit[T, S]
	units_receiver   chan WorkUnit[T, S]
}

func spawn_worker[T, S any](worker *Worker[T, S]) {
	// TODO: handle tiemouts
	for workUnit := range worker.receptor {
		// Wait for rate-limit
		<-worker.rate_limit

		value, err := worker.work(workUnit.argument)
		workUnit.result = value
		workUnit.err = err

		worker.transmiter <- workUnit
	}
}

func handleFailedWorkUnit[T, S any](
	workUnit *WorkUnit[T, S],
	channels *Channels[T, S],
	config *WorkConfig,
) bool {
	if config.max_retries <= workUnit.attempts {
		channels.tasks_failed <- workUnit.err
		config.tasks_processed.Done()
		return false
	}

	workUnit.attempts++
	workUnit.err = nil

	if workUnit.timeout == 0 {
		workUnit.timeout = config.base_retry_time
	} else {
		workUnit.timeout *= 2
	}

	go func() {
		jitter := time.Duration(rand.Int63n(int64(workUnit.timeout)))
		timeout := workUnit.timeout + jitter
		fmt.Printf(
			"Unit with value %v failed for %v time, retrying in: %v\n",
			workUnit.argument,
			workUnit.attempts,
			timeout,
		)
		time.Sleep(timeout)
		channels.units_dispatcher <- *workUnit
	}()

	return true
}

// this is in charge of what we return to the user
// exits when units_receiver is closed, which is done when the workers are closed
func listenForWorkResults[T, S any](
	ctx context.Context,
	channels *Channels[T, S],
	config *WorkConfig,
) {
	for {
		select {
		case workUnit, ok := <-channels.units_receiver:
			if !ok {
				return
			}

			if workUnit.err != nil {
				handleFailedWorkUnit(&workUnit, channels, config)
				continue
			}

			// Send message to user
			channels.tasks_done <- workUnit.result
			config.tasks_processed.Done()
		case <-ctx.Done():
			return
		}
	}
}

// this is in charge of receive values and transform them into work units
// stops when the queue is empty
func workUnitDispatcher[T, S any](
	ctx context.Context,
	finish context.CancelFunc,
	channels *Channels[T, S],
	config *WorkConfig,
) {
	defer stopProcessingWork(finish, channels, config)

	for {
		select {
		case value, ok := <-channels.tasks_queue:
			if !ok {
				return
			}

			workUnit := WorkUnit[T, S]{
				argument: value,
				timeout:  0,
				attempts: 0,
			}
			channels.units_dispatcher <- workUnit
			config.tasks_processed.Add(1)

		case <-ctx.Done():
			fmt.Println("context done")
			return
		}
	}
}

// this wait for all workers to stop, then close the unit channels where the workers send values
// prevent closing the channel before the workers finish
func stopProcessingWork[T, S any](
	finish context.CancelFunc,
	channels *Channels[T, S],
	config *WorkConfig,
) {
	config.tasks_processed.Wait()

	close(channels.units_receiver)
	close(channels.units_dispatcher)
	close(channels.tasks_done)
	close(channels.tasks_failed)

	finish()
}

func asyncTaskRunner[T, S any](
	ctx context.Context,
	inbound chan T,
	config *WorkConfig,
	work Work[T, S],
) (<-chan S, <-chan error, <-chan struct{}) {
	channel_size := config.max_workers * 3

	done, finish := context.WithCancel(ctx)

	channels := &Channels[T, S]{
		tasks_queue:      inbound,
		tasks_done:       make(chan S),
		tasks_failed:     make(chan error),
		units_dispatcher: make(chan WorkUnit[T, S], channel_size),
		units_receiver:   make(chan WorkUnit[T, S], channel_size),
	}

	// create pool of workers
	for i := range config.max_workers {
		worker := &Worker[T, S]{
			id:         uint8(i),
			receptor:   channels.units_dispatcher,
			transmiter: channels.units_receiver,
			rate_limit: config.rate_limit,
			work:       work,
		}

		go spawn_worker(worker)
	}

	go listenForWorkResults(done, channels, config)
	go workUnitDispatcher(done, finish, channels, config)
	return channels.tasks_done, channels.tasks_failed, done.Done()
}