diff --git a/README.md b/README.md index e94b920..3f89c25 100644 --- a/README.md +++ b/README.md @@ -62,6 +62,10 @@ Available flags: - Specifies Redis server's address - Type: string - Default value: none +- `-scraperfailurepause` + - Amount of time in seconds to wait after a failed task to idle (0 for immediate retry, negative for pause until end of minute) + - Type: integer + - Default value: `-1` You can use them like this: ```bash diff --git a/handlers/getStatus.go b/handlers/getStatus.go index 4dcb5f9..a57d615 100644 --- a/handlers/getStatus.go +++ b/handlers/getStatus.go @@ -12,7 +12,7 @@ import ( ) var initTime = time.Now() -var version = "1.17.0" +var version = "1.18.0" func getStatus(w http.ResponseWriter, r *http.Request) { json.NewEncoder(w).Encode(map[string]interface{}{ diff --git a/main.go b/main.go index 3c771b9..9daf25c 100644 --- a/main.go +++ b/main.go @@ -26,6 +26,7 @@ func main() { flagProxyReloadWebhook := flag.String("proxyreloadwebhook", "", "Webhook address to request proxy reload") flagRateLimit := flag.Uint64("ratelimit", 512, "Maximum number of requests per minute per IP") flagRedis := flag.String("redis", "", "Redis connection string") + flagScraperFailurePause := flag.Int("scraperfailurepause", -1, "Amount of time in seconds to wait after a failed task to idle") flagTaskRetries := flag.Uint("taskretries", 3, "Number of retries for a scraping task") flagVerbose := flag.Bool("verbose", false, "Print out additional logs into stdout") flag.Parse() @@ -58,6 +59,7 @@ func main() { viper.Set("proxyreloadwebhook", *flagProxyReloadWebhook) viper.Set("ratelimit", int64(*flagRateLimit)) viper.Set("redis", *flagRedis) + viper.Set("scraperfailurepause", time.Duration(*flagScraperFailurePause)*time.Second) viper.Set("taskretries", int(*flagTaskRetries)) viper.Set("verbose", *flagVerbose) diff --git a/scraper/handleTaskError.go b/scraper/handleTaskError.go new file mode 100644 index 0000000..ec539ef --- /dev/null +++ b/scraper/handleTaskError.go @@ -0,0 +1,44 @@ +package scraper + +import ( + "bdo-rest-api/logger" + "bdo-rest-api/utils" + "fmt" + "strconv" + "time" + + "github.com/gocolly/colly/v2" + "github.com/spf13/viper" +) + +func handleTaskError(r *colly.Request, imperva bool, err error) { + taskRetries, _ := strconv.Atoi(r.Ctx.Get("taskRetries")) + + if imperva { + logger.Error(fmt.Sprintf("Hit Imperva while loading %v, retries: %v", r.URL, taskRetries)) + } else { + logger.Error(fmt.Sprintf("Error occured while loading %v: %v, retries: %v", r.URL, err, taskRetries)) + } + + if proxyReloadWebhook := viper.GetString("proxyreloadwebhook"); proxyReloadWebhook != "" { + utils.SendDummyRequest(proxyReloadWebhook) + } + + if scraperFailurePause := viper.GetDuration("scraperfailurepause"); scraperFailurePause >= 0 { + taskQueue.Pause(scraperFailurePause) + } else { + taskQueue.Pause(time.Duration(60-time.Now().Second()) * time.Second) + } + + taskQueue.ConfirmTaskCompletion(r.Ctx.Get("taskClient"), r.Ctx.Get("taskHash")) + + if taskRetries < viper.GetInt("taskretries") { + taskRegion := r.Ctx.Get("taskRegion") + taskType := r.Ctx.Get("taskType") + taskQueue.AddTask(r.Ctx.Get("taskClient"), r.Ctx.Get("taskHash"), utils.BuildRequest(r.URL.String(), map[string]string{ + "taskRegion": taskRegion, + "taskRetries": strconv.Itoa(taskRetries + 1), + "taskType": taskType, + })) + } +} diff --git a/scraper/scraper.go b/scraper/scraper.go index fd8cadb..c118b53 100644 --- a/scraper/scraper.go +++ b/scraper/scraper.go @@ -58,8 +58,7 @@ func InitScraper() { }) scraper.OnError(func(r *colly.Response, err error) { - logger.Error(fmt.Sprintf("Error occured while loading %v: %v", r.Request.URL, err)) - taskQueue.ConfirmTaskCompletion(r.Ctx.Get("taskClient"), r.Ctx.Get("taskHash")) + handleTaskError(r.Request, false, err) }) scraper.OnResponse(func(r *colly.Response) { @@ -80,24 +79,7 @@ func InitScraper() { }) if imperva { - taskRetries, _ := strconv.Atoi(body.Request.Ctx.Get("taskRetries")) - logger.Error(fmt.Sprintf("Hit Imperva while loading %v, retries: %v", body.Request.URL.String(), taskRetries)) - if proxyReloadWebhook := viper.GetString("proxyreloadwebhook"); proxyReloadWebhook != "" { - utils.SendDummyRequest(proxyReloadWebhook) - taskQueue.Pause(time.Second * 5) - } else { - taskQueue.Pause(time.Duration(60-time.Now().Second()) * time.Second) - } - taskQueue.ConfirmTaskCompletion(taskClient, taskHash) - - if taskRetries < viper.GetInt("taskretries") { - taskQueue.AddTask(taskClient, taskHash, utils.BuildRequest(body.Request.URL.String(), map[string]string{ - "taskRegion": taskRegion, - "taskRetries": strconv.Itoa(taskRetries + 1), - "taskType": taskType, - })) - } - + handleTaskError(body.Request, true, nil) return }