Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ Available flags:
- Specifies Redis server's address
- Type: string
- Default value: none
- `-scraperfailurepause`
- Amount of time in seconds to wait after a failed task to idle (0 for immediate retry, negative for pause until end of minute)
- Type: integer
- Default value: `-1`

You can use them like this:
```bash
Expand Down
2 changes: 1 addition & 1 deletion handlers/getStatus.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import (
)

var initTime = time.Now()
var version = "1.17.0"
var version = "1.18.0"

func getStatus(w http.ResponseWriter, r *http.Request) {
json.NewEncoder(w).Encode(map[string]interface{}{
Expand Down
2 changes: 2 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ func main() {
flagProxyReloadWebhook := flag.String("proxyreloadwebhook", "", "Webhook address to request proxy reload")
flagRateLimit := flag.Uint64("ratelimit", 512, "Maximum number of requests per minute per IP")
flagRedis := flag.String("redis", "", "Redis connection string")
flagScraperFailurePause := flag.Int("scraperfailurepause", -1, "Amount of time in seconds to wait after a failed task to idle")
flagTaskRetries := flag.Uint("taskretries", 3, "Number of retries for a scraping task")
flagVerbose := flag.Bool("verbose", false, "Print out additional logs into stdout")
flag.Parse()
Expand Down Expand Up @@ -58,6 +59,7 @@ func main() {
viper.Set("proxyreloadwebhook", *flagProxyReloadWebhook)
viper.Set("ratelimit", int64(*flagRateLimit))
viper.Set("redis", *flagRedis)
viper.Set("scraperfailurepause", time.Duration(*flagScraperFailurePause)*time.Second)
viper.Set("taskretries", int(*flagTaskRetries))
viper.Set("verbose", *flagVerbose)

Expand Down
44 changes: 44 additions & 0 deletions scraper/handleTaskError.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package scraper

import (
"bdo-rest-api/logger"
"bdo-rest-api/utils"
"fmt"
"strconv"
"time"

"github.com/gocolly/colly/v2"
"github.com/spf13/viper"
)

func handleTaskError(r *colly.Request, imperva bool, err error) {
taskRetries, _ := strconv.Atoi(r.Ctx.Get("taskRetries"))

if imperva {
logger.Error(fmt.Sprintf("Hit Imperva while loading %v, retries: %v", r.URL, taskRetries))
} else {
logger.Error(fmt.Sprintf("Error occured while loading %v: %v, retries: %v", r.URL, err, taskRetries))
}

if proxyReloadWebhook := viper.GetString("proxyreloadwebhook"); proxyReloadWebhook != "" {
utils.SendDummyRequest(proxyReloadWebhook)
}

if scraperFailurePause := viper.GetDuration("scraperfailurepause"); scraperFailurePause >= 0 {
taskQueue.Pause(scraperFailurePause)
} else {
taskQueue.Pause(time.Duration(60-time.Now().Second()) * time.Second)
}

taskQueue.ConfirmTaskCompletion(r.Ctx.Get("taskClient"), r.Ctx.Get("taskHash"))

if taskRetries < viper.GetInt("taskretries") {
taskRegion := r.Ctx.Get("taskRegion")
taskType := r.Ctx.Get("taskType")
taskQueue.AddTask(r.Ctx.Get("taskClient"), r.Ctx.Get("taskHash"), utils.BuildRequest(r.URL.String(), map[string]string{
"taskRegion": taskRegion,
"taskRetries": strconv.Itoa(taskRetries + 1),
"taskType": taskType,
}))
}
}
22 changes: 2 additions & 20 deletions scraper/scraper.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,7 @@ func InitScraper() {
})

scraper.OnError(func(r *colly.Response, err error) {
logger.Error(fmt.Sprintf("Error occured while loading %v: %v", r.Request.URL, err))
taskQueue.ConfirmTaskCompletion(r.Ctx.Get("taskClient"), r.Ctx.Get("taskHash"))
handleTaskError(r.Request, false, err)
})

scraper.OnResponse(func(r *colly.Response) {
Expand All @@ -80,24 +79,7 @@ func InitScraper() {
})

if imperva {
taskRetries, _ := strconv.Atoi(body.Request.Ctx.Get("taskRetries"))
logger.Error(fmt.Sprintf("Hit Imperva while loading %v, retries: %v", body.Request.URL.String(), taskRetries))
if proxyReloadWebhook := viper.GetString("proxyreloadwebhook"); proxyReloadWebhook != "" {
utils.SendDummyRequest(proxyReloadWebhook)
taskQueue.Pause(time.Second * 5)
} else {
taskQueue.Pause(time.Duration(60-time.Now().Second()) * time.Second)
}
taskQueue.ConfirmTaskCompletion(taskClient, taskHash)

if taskRetries < viper.GetInt("taskretries") {
taskQueue.AddTask(taskClient, taskHash, utils.BuildRequest(body.Request.URL.String(), map[string]string{
"taskRegion": taskRegion,
"taskRetries": strconv.Itoa(taskRetries + 1),
"taskType": taskType,
}))
}

handleTaskError(body.Request, true, nil)
return
}

Expand Down