From d58149f741b01d70e71074f947a5099e78473576 Mon Sep 17 00:00:00 2001 From: man90 Date: Mon, 25 May 2026 19:01:23 +0200 Subject: [PATCH 1/3] Fix potential task requeueing fail --- scraper/handleTaskError.go | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/scraper/handleTaskError.go b/scraper/handleTaskError.go index f35278e..79e1ab4 100644 --- a/scraper/handleTaskError.go +++ b/scraper/handleTaskError.go @@ -19,6 +19,24 @@ func handleTaskError(r *colly.Request, blocked bool, err error) { taskClient := r.Ctx.Get(metadataTaskClient) taskHash := r.Ctx.Get(metadataTaskHash) + taskQueue.ConfirmTaskCompletion(taskClient, taskHash) + if taskRetries < viper.GetInt("taskretries") { + taskQueue.AddTask( + taskClient, + taskHash, + r.URL.String(), + viper.GetBool("taskretryfront"), + map[string]string{ + metadataTaskAddedAt: r.Ctx.Get(metadataTaskAddedAt), + metadataTaskClient: taskClient, + metadataTaskHash: taskHash, + metadataTaskRegion: r.Ctx.Get(metadataTaskRegion), + metadataTaskRetries: strconv.Itoa(taskRetries + 1), + metadataTaskType: r.Ctx.Get(metadataTaskType), + }, + ) + } + if blocked { logger.Error(fmt.Sprintf("Hit Imperva while loading %v, retries: %v", r.URL, taskRetries)) } else if strings.Contains(err.Error(), "http2: Transport received GOAWAY from server ErrCode:INTERNAL_ERROR") { @@ -40,23 +58,4 @@ func handleTaskError(r *colly.Request, blocked bool, err error) { } else { taskQueue.Pause(time.Duration(60-time.Now().Second()) * time.Second) } - - taskQueue.ConfirmTaskCompletion(taskClient, taskHash) - - if taskRetries < viper.GetInt("taskretries") { - taskQueue.AddTask( - taskClient, - taskHash, - r.URL.String(), - viper.GetBool("taskretryfront"), - map[string]string{ - metadataTaskAddedAt: r.Ctx.Get(metadataTaskAddedAt), - metadataTaskClient: taskClient, - metadataTaskHash: taskHash, - metadataTaskRegion: r.Ctx.Get(metadataTaskRegion), - metadataTaskRetries: strconv.Itoa(taskRetries + 1), - metadataTaskType: r.Ctx.Get(metadataTaskType), - }, - ) - } } From 1e6040780b303064b7913ae888f5b0bc352b2bba Mon Sep 17 00:00:00 2001 From: man90 Date: Mon, 25 May 2026 19:10:20 +0200 Subject: [PATCH 2/3] Move confirming task completion to a script --- scraper/taskQueue.go | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/scraper/taskQueue.go b/scraper/taskQueue.go index 0623384..9e2d431 100644 --- a/scraper/taskQueue.go +++ b/scraper/taskQueue.go @@ -28,6 +28,15 @@ var enqueueScript = redis.NewScript(` return 1 `) +var confirmCompletionScript = redis.NewScript(` + local clientsKey, hashesKey = KEYS[1], KEYS[2] + local taskClient, hash = ARGV[1], ARGV[2] + + redis.call("HINCRBY", clientsKey, taskClient, -1) + redis.call("SREM", hashesKey, hash) + return 1 +`) + type Task struct { Hash string Metadata map[string]string @@ -198,8 +207,5 @@ func (q *TaskQueue) CountQueuedTasksForClient(taskClient string) (count int) { } func (q *TaskQueue) ConfirmTaskCompletion(taskClient string, hash string) { - pipe := q.rdb.Pipeline() - pipe.HIncrBy(q.ctx, q.clientsKey, taskClient, -1) - pipe.SRem(q.ctx, q.hashesKey, hash) - _, _ = pipe.Exec(q.ctx) + _, _ = confirmCompletionScript.Run(q.ctx, q.rdb, []string{q.clientsKey, q.hashesKey}, taskClient, hash).Result() } From 76befb2146d7e96066bc0ee6b5749b373cff5b39 Mon Sep 17 00:00:00 2001 From: man90 Date: Mon, 25 May 2026 19:12:19 +0200 Subject: [PATCH 3/3] Bump version number --- handlers/getStatus.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/handlers/getStatus.go b/handlers/getStatus.go index d2bcfc6..fd5280c 100644 --- a/handlers/getStatus.go +++ b/handlers/getStatus.go @@ -12,7 +12,7 @@ import ( ) var initTime = time.Now() -var version = "1.19.0" +var version = "1.19.1" func getStatus(w http.ResponseWriter, r *http.Request) { json.NewEncoder(w).Encode(map[string]interface{}{