Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 56 additions & 1 deletion colly_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,17 @@ func newTestServer() *httptest.Server {
`))
})

mux.HandleFunc("/xml", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/xml")
w.Write([]byte(`<?xml version="1.0" encoding="UTF-8"?>
<page>
<title>Test Page</title>
<paragraph type="description">This is a test page</paragraph>
<paragraph type="description">This is a test paragraph</paragraph>
</page>
`))
})

mux.HandleFunc("/login", func(w http.ResponseWriter, r *http.Request) {
if r.Method == "POST" {
w.Header().Set("Content-Type", "text/html")
Expand Down Expand Up @@ -1038,7 +1049,7 @@ func TestHTMLElement(t *testing.T) {
}
}

func TestCollectorOnXML(t *testing.T) {
func TestCollectorOnXMLWithHtml(t *testing.T) {
ts := newTestServer()
defer ts.Close()

Expand Down Expand Up @@ -1082,6 +1093,50 @@ func TestCollectorOnXML(t *testing.T) {
}
}

func TestCollectorOnXMLWithXML(t *testing.T) {
ts := newTestServer()
defer ts.Close()

c := NewCollector()

titleCallbackCalled := false
paragraphCallbackCount := 0

c.OnXML("//page/title", func(e *XMLElement) {
titleCallbackCalled = true
if e.Text != "Test Page" {
t.Error("Title element text does not match, got", e.Text)
}
})

c.OnXML("//page/paragraph", func(e *XMLElement) {
paragraphCallbackCount++
if e.Attr("type") != "description" {
t.Error("Failed to get paragraph's type attribute")
}
})

c.OnXML("/page", func(e *XMLElement) {
if e.ChildAttr("paragraph", "type") != "description" {
t.Error("Invalid type value")
}
classes := e.ChildAttrs("paragraph", "type")
if len(classes) != 2 {
t.Error("Invalid type values")
}
})

c.Visit(ts.URL + "/xml")

if !titleCallbackCalled {
t.Error("Failed to call OnXML callback for <title> tag")
}

if paragraphCallbackCount != 2 {
t.Error("Failed to find all <paragraph> tags")
}
}

func TestCollectorVisitWithTrace(t *testing.T) {
ts := newTestServer()
defer ts.Close()
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ require (
github.com/PuerkitoBio/goquery v1.5.1
github.com/andybalholm/cascadia v1.2.0 // indirect
github.com/antchfx/htmlquery v1.2.3
github.com/antchfx/xmlquery v1.3.3
github.com/antchfx/xmlquery v1.3.4
github.com/gobwas/glob v0.2.3
github.com/golang/protobuf v1.4.2 // indirect
github.com/jawher/mow.cli v1.1.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ github.com/andybalholm/cascadia v1.2.0 h1:vuRCkM5Ozh/BfmsaTm26kbjm0mIOM3yS5Ek/F5
github.com/andybalholm/cascadia v1.2.0/go.mod h1:YCyR8vOZT9aZ1CHEd8ap0gMVm2aFgxBp0T0eFw1RUQY=
github.com/antchfx/htmlquery v1.2.3 h1:sP3NFDneHx2stfNXCKbhHFo8XgNjCACnU/4AO5gWz6M=
github.com/antchfx/htmlquery v1.2.3/go.mod h1:B0ABL+F5irhhMWg54ymEZinzMSi0Kt3I2if0BLYa3V0=
github.com/antchfx/xmlquery v1.3.3 h1:HYmadPG0uz8CySdL68rB4DCLKXz2PurCjS3mnkVF4CQ=
github.com/antchfx/xmlquery v1.3.3/go.mod h1:64w0Xesg2sTaawIdNqMB+7qaW/bSqkQm+ssPaCMWNnc=
github.com/antchfx/xmlquery v1.3.4 h1:RuhsI4AA5Ma4XoXhaAr2VjJxU0Xp0W2zy/f9ZIpsF4s=
github.com/antchfx/xmlquery v1.3.4/go.mod h1:64w0Xesg2sTaawIdNqMB+7qaW/bSqkQm+ssPaCMWNnc=
github.com/antchfx/xpath v1.1.6/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
github.com/antchfx/xpath v1.1.10 h1:cJ0pOvEdN/WvYXxvRrzQH9x5QWKpzHacYO8qzCcDYAg=
github.com/antchfx/xpath v1.1.10/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
Expand Down
3 changes: 1 addition & 2 deletions xmlelement.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
package colly

import (
"encoding/xml"
"strings"

"github.com/antchfx/htmlquery"
Expand Down Expand Up @@ -76,7 +75,7 @@ func (h *XMLElement) Attr(k string) string {
}
}
} else {
for _, a := range h.attributes.([]xml.Attr) {
for _, a := range h.attributes.([]xmlquery.Attr) {
if a.Name.Local == k {
return a.Value
}
Expand Down