109 lines
3.2 KiB
Go
109 lines
3.2 KiB
Go
package main
|
|
|
|
import (
|
|
"flag"
|
|
"fmt"
|
|
"io/ioutil"
|
|
"log"
|
|
"net/http"
|
|
"strings"
|
|
|
|
md "github.com/JohannesKaufmann/html-to-markdown"
|
|
"github.com/PuerkitoBio/goquery"
|
|
)
|
|
|
|
var converter = md.NewConverter("", true, nil)
|
|
|
|
// This will get called for each HTML element found
|
|
func processElement(index int, element *goquery.Selection) {
|
|
// See if the href attribute exists on the element
|
|
href, exists := element.Attr("href")
|
|
if exists {
|
|
if strings.Contains(href, *filter) {
|
|
newurl := strings.Replace(*url+href, "//"+*filter, "/"+*filter, -1)
|
|
fmt.Println(newurl)
|
|
handleTicket(newurl)
|
|
urls = append(urls, newurl)
|
|
}
|
|
}
|
|
}
|
|
|
|
func handleTicket(ticketurl string) {
|
|
response, err := http.Get(ticketurl)
|
|
if err != nil {
|
|
log.Fatal(err)
|
|
}
|
|
defer response.Body.Close()
|
|
|
|
// Create a goquery document from the HTTP response
|
|
document, err := goquery.NewDocumentFromReader(response.Body)
|
|
if err != nil {
|
|
log.Fatal("Error loading HTTP response body. ", err)
|
|
}
|
|
// Find all links and process them with the function
|
|
// defined earlier
|
|
document.Find("#ticket.trac-content").Each(processTicket)
|
|
document.Find(".ticket").Each(processComments)
|
|
|
|
}
|
|
|
|
func processComments(index int, element *goquery.Selection) {
|
|
tracid := element.Find(".trac-id").Text()
|
|
mstone := element.Find("a.milestone").Text()
|
|
tick, err := element.Find("#changelog").Html()
|
|
if err == nil {
|
|
markdown, err := converter.ConvertString(tick)
|
|
if err != nil {
|
|
log.Fatal(err)
|
|
}
|
|
// fmt.Println("md ->", markdown)
|
|
fmt.Println(markdown)
|
|
ioutil.WriteFile(*comp+"-"+*tag+"-"+mstone+"-"+tracid+"comments.md", []byte(markdown), 0644)
|
|
}
|
|
}
|
|
|
|
func processTicket(index int, element *goquery.Selection) {
|
|
tracid := element.Find(".trac-id").Text()
|
|
mstone := element.Find("a.milestone").Text()
|
|
tick, err := element.Html()
|
|
if err == nil {
|
|
markdown, err := converter.ConvertString(tick)
|
|
if err != nil {
|
|
log.Fatal(err)
|
|
}
|
|
// fmt.Println("md ->", markdown)
|
|
fmt.Println(markdown)
|
|
ioutil.WriteFile(*comp+"-"+*tag+"-"+mstone+"-"+tracid+".md", []byte(markdown), 0644)
|
|
}
|
|
}
|
|
|
|
var urls []string
|
|
|
|
var url = flag.String("starturl", "https://trac.i2p2.de/", "URL to start scraping at")
|
|
var comp = flag.String("component", "router", "Component to collect information about")
|
|
var tag = flag.String("category", "general", "Category of issue to collect information about within the component")
|
|
var status = flag.String("status", "!closed", "Status of tickets to collect information about")
|
|
var order = flag.String("order", "priority", "Order to return tickets in")
|
|
var filter = flag.String("filter", "ticket", "Type of item to return")
|
|
|
|
//https://trac.i2p2.de/query?status=!closed&component=router%2Fgeneral&order=priority
|
|
func main() {
|
|
flag.Parse()
|
|
// Make HTTP request
|
|
response, err := http.Get(*url + "query?status=" + *status + "&component=" + *comp + "%2F" + *tag + "&order=" + *order)
|
|
if err != nil {
|
|
log.Fatal(err)
|
|
}
|
|
defer response.Body.Close()
|
|
|
|
// Create a goquery document from the HTTP response
|
|
document, err := goquery.NewDocumentFromReader(response.Body)
|
|
if err != nil {
|
|
log.Fatal("Error loading HTTP response body. ", err)
|
|
}
|
|
|
|
// Find all links and process them with the function
|
|
// defined earlier
|
|
document.Find("a").Each(processElement)
|
|
}
|