point-tools-crawler/point_post_crawler.go

118 lines
2.9 KiB
Go
Raw Normal View History

2016-03-07 20:04:50 +00:00
package main
import (
"bitbucket.org/skobkin/point-api-go"
"bitbucket.org/skobkin/point-tools-go"
"gopkg.in/alecthomas/kingpin.v2"
2016-03-21 20:54:09 +00:00
"fmt"
2016-03-07 20:04:50 +00:00
"log"
"strconv"
2016-03-21 20:54:09 +00:00
"time"
2016-03-07 20:04:50 +00:00
)
var (
pointApiUrl = kingpin.Flag("point-api", "Point.im API URL").Default("https://point.im/api/").String()
pointToolsApiUrl = kingpin.Flag("point-tools-api", "Point Tools API URL").Default("https://point.skobk.in/api/crawler/").String()
pointLogin = kingpin.Flag("login", "Point.im login").Short('l').Required().String()
pointPassword = kingpin.Flag("password", "Point.im password").Short('p').Required().String()
pointToolsToken = kingpin.Flag("point-tools-token", "Point Tools secret token for crawler").Short('t').Required().String()
forceContinue = kingpin.Flag("force-continue", "Keep going even after API rejected new page").Short('f').Bool()
fromUid = kingpin.Flag("from-uid", "Start after provided post UID").Short('u').Default("0").Int()
limit = kingpin.Flag("limit", "How many pages to get").Short('c').Default("0").Int()
)
2016-03-07 20:04:50 +00:00
func main() {
kingpin.Parse()
pointClient := point.New(*pointApiUrl)
pointToolsClient := point_tools.New(*pointToolsApiUrl, *pointToolsToken)
2016-03-07 20:04:50 +00:00
_, loginErr := pointClient.Login(*pointLogin, *pointPassword)
2016-03-07 20:04:50 +00:00
if loginErr != nil {
fmt.Printf("Login error %s", loginErr)
2016-03-15 22:21:50 +00:00
return
} else {
fmt.Printf("Successfully authenticated!\n")
}
2016-03-07 20:04:50 +00:00
var page point.Page
var reqErr error = nil
if 0 != *fromUid {
page, reqErr = pointClient.GetNextAllPostsPageBeforeUid(*fromUid)
} else {
page, reqErr = pointClient.GetRecentAllPostsPage()
}
2016-03-07 20:04:50 +00:00
if reqErr != nil {
log.Fatal(reqErr)
2016-03-15 22:21:50 +00:00
return
}
2016-03-07 20:04:50 +00:00
2016-03-15 22:21:50 +00:00
fmt.Printf("1 page requested\n")
2016-03-07 20:04:50 +00:00
if len(page.Posts) > 0 {
fmt.Println("Last uid", strconv.Itoa(page.Posts[len(page.Posts)-1].Uid))
}
2016-03-30 02:02:41 +00:00
sendResp, sendErr := pointToolsClient.SendAllPage(page)
if sendErr != nil {
log.Fatal(sendErr)
}
if point_tools.STATUS_SUCCESS != sendResp.Status {
fmt.Println("Request error", sendResp.Error.Message)
}
if false == sendResp.Data.Continue && false == *forceContinue {
fmt.Println("API rejected next page request")
fmt.Println("Exiting.")
return
}
pageNumber := 1
2016-03-07 20:04:50 +00:00
2016-03-15 22:21:50 +00:00
for page.HasNext {
pageNumber++
2016-03-07 20:04:50 +00:00
if *limit > 0 && pageNumber > *limit {
fmt.Println("Limit reached")
break
}
page, reqErr = pointClient.GetNextAllPostsPage(page)
2016-03-07 20:04:50 +00:00
if reqErr != nil {
log.Fatal(reqErr)
2016-03-15 22:21:50 +00:00
return
2016-03-07 20:04:50 +00:00
}
fmt.Printf("%d page requested", pageNumber)
if len(page.Posts) > 0 {
fmt.Printf(", last uid %d", page.Posts[len(page.Posts)-1].Uid)
}
fmt.Printf(" -> %d posts\n", len(page.Posts))
2016-03-15 22:56:51 +00:00
2016-03-30 02:02:41 +00:00
sendResp, sendErr = pointToolsClient.SendAllPage(page)
2016-03-07 20:04:50 +00:00
if sendErr != nil {
log.Fatal(sendErr)
}
if point_tools.STATUS_SUCCESS != sendResp.Status {
fmt.Println("Request error", sendResp.Error.Message)
}
if false == sendResp.Data.Continue && false == *forceContinue {
fmt.Println("API rejected next page request")
break
}
time.Sleep(time.Second)
2016-03-07 20:04:50 +00:00
}
fmt.Println("Exiting.")
2016-03-07 20:04:50 +00:00
}