mirror of
https://bitbucket.org/skobkin/point-tools-crawler.git
synced 2024-12-05 02:25:53 +00:00
Crawler now supports 2 new flags: -u (select from uuid) and -c (number of pages to get).
This commit is contained in:
parent
d791b2377c
commit
df54db176d
|
@ -87,20 +87,16 @@ func (c *PointClient) GetRecentAllPostsPage() (Page, error) {
|
|||
return page, nil
|
||||
}
|
||||
|
||||
func (c *PointClient) GetNextAllPostsPage(page Page) (Page, error) {
|
||||
func (c *PointClient) GetNextAllPostsPageBeforeUid(uid int) (Page, error) {
|
||||
var nextPage Page
|
||||
|
||||
if false == page.HasNext || len(page.Posts) == 0 {
|
||||
return nextPage, errors.New("Page must have has_next=true and some posts")
|
||||
}
|
||||
|
||||
if len(c.token.AuthToken) == 0 {
|
||||
log.Fatal("Can not get recent posts. Login first.")
|
||||
return nextPage, errors.New("Login first")
|
||||
}
|
||||
|
||||
data := url.Values{}
|
||||
data.Set("before", strconv.Itoa(page.Posts[len(page.Posts)-1].Uid))
|
||||
data.Set("before", strconv.Itoa(uid))
|
||||
|
||||
headers := map[string]string{
|
||||
"Authorization": c.token.AuthToken,
|
||||
|
@ -127,3 +123,20 @@ func (c *PointClient) GetNextAllPostsPage(page Page) (Page, error) {
|
|||
|
||||
return nextPage, nil
|
||||
}
|
||||
|
||||
func (c *PointClient) GetNextAllPostsPage(page Page) (Page, error) {
|
||||
var nextPage Page
|
||||
|
||||
if len(page.Posts) == 0 {
|
||||
return nextPage, errors.New("Page must have has_next=true and some posts")
|
||||
}
|
||||
|
||||
if len(c.token.AuthToken) == 0 {
|
||||
log.Fatal("Can not get recent posts. Login first.")
|
||||
return nextPage, errors.New("Login first")
|
||||
}
|
||||
|
||||
nextPage, err := c.GetNextAllPostsPageBeforeUid(page.Posts[len(page.Posts)-1].Uid)
|
||||
|
||||
return nextPage, err
|
||||
}
|
||||
|
|
|
@ -14,13 +14,17 @@ func main() {
|
|||
var pointApiUrl, pointToolsApiUrl string;
|
||||
var pointLogin, pointPassword, pointToolsToken string
|
||||
var forceContinue bool
|
||||
var fromUid, limit int
|
||||
|
||||
// Todo refactor the CLI options to new library
|
||||
flag.StringVar(&pointLogin, "l", "", "Account login")
|
||||
flag.StringVar(&pointPassword, "p", "", "Account password")
|
||||
flag.StringVar(&pointToolsToken, "t", "", "Point Tools crawler API token")
|
||||
flag.StringVar(&pointApiUrl, "s", "https://point.im/api/", "Point.im API url")
|
||||
flag.StringVar(&pointToolsApiUrl, "g", "https://point.skobk.in/api/crawler/", "Point Tools API url")
|
||||
flag.BoolVar(&forceContinue, "f", false, "Force continue reading /all despite of server refusal")
|
||||
flag.IntVar(&fromUid, "u", 0, "From which UID start to load pages")
|
||||
flag.IntVar(&limit, "c", 0, "How many pages to get")
|
||||
flag.Parse()
|
||||
|
||||
if len(pointLogin) < 1 || len(pointPassword) < 1 {
|
||||
|
@ -45,7 +49,14 @@ func main() {
|
|||
fmt.Printf("Successfully authenticated!\n")
|
||||
}
|
||||
|
||||
page, reqErr := pointClient.GetRecentAllPostsPage()
|
||||
var page point.Page
|
||||
var reqErr error = nil
|
||||
|
||||
if 0 != fromUid {
|
||||
page, reqErr = pointClient.GetNextAllPostsPageBeforeUid(fromUid)
|
||||
} else {
|
||||
page, reqErr = pointClient.GetRecentAllPostsPage()
|
||||
}
|
||||
|
||||
if reqErr != nil {
|
||||
log.Fatal(reqErr)
|
||||
|
@ -79,6 +90,11 @@ func main() {
|
|||
for page.HasNext {
|
||||
pageNumber++
|
||||
|
||||
if pageNumber > limit {
|
||||
fmt.Println("Limit reached")
|
||||
break
|
||||
}
|
||||
|
||||
page, reqErr = pointClient.GetNextAllPostsPage(page)
|
||||
|
||||
if reqErr != nil {
|
||||
|
|
Loading…
Reference in a new issue