Files
frankentrac/main.go
2021-04-28 17:01:14 -04:00

109 lines
3.2 KiB
Go

package main
import (
"flag"
"fmt"
"io/ioutil"
"log"
"net/http"
"strings"
md "github.com/JohannesKaufmann/html-to-markdown"
"github.com/PuerkitoBio/goquery"
)
var converter = md.NewConverter("", true, nil)
// This will get called for each HTML element found
func processElement(index int, element *goquery.Selection) {
// See if the href attribute exists on the element
href, exists := element.Attr("href")
if exists {
if strings.Contains(href, *filter) {
newurl := strings.Replace(*url+href, "//"+*filter, "/"+*filter, -1)
fmt.Println(newurl)
handleTicket(newurl)
urls = append(urls, newurl)
}
}
}
func handleTicket(ticketurl string) {
response, err := http.Get(ticketurl)
if err != nil {
log.Fatal(err)
}
defer response.Body.Close()
// Create a goquery document from the HTTP response
document, err := goquery.NewDocumentFromReader(response.Body)
if err != nil {
log.Fatal("Error loading HTTP response body. ", err)
}
// Find all links and process them with the function
// defined earlier
document.Find("#ticket.trac-content").Each(processTicket)
document.Find(".ticket").Each(processComments)
}
func processComments(index int, element *goquery.Selection) {
tracid := element.Find(".trac-id").Text()
mstone := element.Find("a.milestone").Text()
tick, err := element.Find("#changelog").Html()
if err == nil {
markdown, err := converter.ConvertString(tick)
if err != nil {
log.Fatal(err)
}
// fmt.Println("md ->", markdown)
fmt.Println(markdown)
ioutil.WriteFile(*comp+"-"+*tag+"-"+mstone+"-"+tracid+"comments.md", []byte(markdown), 0644)
}
}
func processTicket(index int, element *goquery.Selection) {
tracid := element.Find(".trac-id").Text()
mstone := element.Find("a.milestone").Text()
tick, err := element.Html()
if err == nil {
markdown, err := converter.ConvertString(tick)
if err != nil {
log.Fatal(err)
}
// fmt.Println("md ->", markdown)
fmt.Println(markdown)
ioutil.WriteFile(*comp+"-"+*tag+"-"+mstone+"-"+tracid+".md", []byte(markdown), 0644)
}
}
var urls []string
var url = flag.String("starturl", "https://trac.i2p2.de/", "URL to start scraping at")
var comp = flag.String("component", "router", "Component to collect information about")
var tag = flag.String("category", "general", "Category of issue to collect information about within the component")
var status = flag.String("status", "!closed", "Status of tickets to collect information about")
var order = flag.String("order", "priority", "Order to return tickets in")
var filter = flag.String("filter", "ticket", "Type of item to return")
//https://trac.i2p2.de/query?status=!closed&component=router%2Fgeneral&order=priority
func main() {
flag.Parse()
// Make HTTP request
response, err := http.Get(*url + "query?status=" + *status + "&component=" + *comp + "%2F" + *tag + "&order=" + *order)
if err != nil {
log.Fatal(err)
}
defer response.Body.Close()
// Create a goquery document from the HTTP response
document, err := goquery.NewDocumentFromReader(response.Body)
if err != nil {
log.Fatal("Error loading HTTP response body. ", err)
}
// Find all links and process them with the function
// defined earlier
document.Find("a").Each(processElement)
}