Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/config-zh.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
| `scrape.past` | `time.Duration` | 抓取 Feed 的回溯时间窗口。例如 `1h` 表示只抓取过去 1 小时的 Feed。 | `24h` | 否 |
| `scrape.interval` | `time.Duration` | 抓取每个源的频率 (全局默认值)。例如 `1h`。 | `1h` | 否 |
| `scrape.rsshub_endpoint` | `string` | RSSHub 的端点。你可以部署自己的 RSSHub 服务器或使用公共实例 (参见 [RSSHub 文档](https://docs.rsshub.app/guide/instances))。例如 `https://rsshub.app`。 | | 是 (如果使用了 `rsshub_route_path`) |
| `scrape.rsshub_access_key` | `string` | RSSHub 的访问密钥。用于访问控制。(详情见 [RSSHub文档访问控制](https://docs.rsshub.app/deploy/config#access-control-configurations)) | | 否 |
| `scrape.sources` | `对象列表` | 用于抓取 Feed 的源列表。详见下方的 **抓取源配置**。 | `[]` | 是 (至少一个) |

### 抓取源配置 (`scrape.sources[]`)
Expand Down
1 change: 1 addition & 0 deletions docs/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ This section configures parameters related to the Jina AI Reader API, primarily
| `scrape.past` | `time.Duration` | Time window to look back when scraping feeds. E.g., `1h` means only scrape feeds from the past 1 hour. | `24h` | No |
| `scrape.interval` | `time.Duration` | Frequency to scrape each source (global default). E.g., `1h`. | `1h` | No |
| `scrape.rsshub_endpoint` | `string` | Endpoint for RSSHub. You can deploy your own RSSHub server or use a public instance (see [RSSHub Documentation](https://docs.rsshub.app/guide/instances)). E.g., `https://rsshub.app`. | | Yes (if `rsshub_route_path` is used) |
| `scrape.rsshub_access_key` | `string` | The access key for RSSHub. Used for access control. (see [RSSHub config](https://docs.rsshub.app/deploy/config#access-control-configurations))| | No |
| `scrape.sources` | `list of objects` | List of sources to scrape feeds from. See **Scrape Source Configuration** below. | `[]` | Yes (at least one) |

### Scrape Source Configuration (`scrape.sources[]`)
Expand Down
9 changes: 5 additions & 4 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,11 @@ type LLM struct {
}

type Scrape struct {
Past timeutil.Duration `yaml:"past,omitempty" json:"past,omitempty" desc:"The lookback time window for scraping feeds. e.g. 1h means only scrape feeds in the past 1 hour. Default: 3d"`
Interval timeutil.Duration `yaml:"interval,omitempty" json:"interval,omitempty" desc:"How often to scrape each source, it is a global interval. e.g. 1h. Default: 1h"`
RSSHubEndpoint string `yaml:"rsshub_endpoint,omitempty" json:"rsshub_endpoint,omitempty" desc:"The endpoint of the RSSHub. You can deploy your own RSSHub server or use the public one (https://docs.rsshub.app/guide/instances). e.g. https://rsshub.app. It is required when sources[].rss.rsshub_route_path is set."`
Sources []ScrapeSource `yaml:"sources,omitempty" json:"sources,omitempty" desc:"The sources for scraping feeds."`
Past timeutil.Duration `yaml:"past,omitempty" json:"past,omitempty" desc:"The lookback time window for scraping feeds. e.g. 1h means only scrape feeds in the past 1 hour. Default: 3d"`
Interval timeutil.Duration `yaml:"interval,omitempty" json:"interval,omitempty" desc:"How often to scrape each source, it is a global interval. e.g. 1h. Default: 1h"`
RSSHubEndpoint string `yaml:"rsshub_endpoint,omitempty" json:"rsshub_endpoint,omitempty" desc:"The endpoint of the RSSHub. You can deploy your own RSSHub server or use the public one (https://docs.rsshub.app/guide/instances). e.g. https://rsshub.app. It is required when sources[].rss.rsshub_route_path is set."`
RSSHubAccessKey string `yaml:"rsshub_access_key,omitempty" json:"rsshub_access_key,omitempty" desc:"The access key for RSSHub. Used for access control. (see [RSSHub config](https://docs.rsshub.app/deploy/config#access-control-configurations))"`
Sources []ScrapeSource `yaml:"sources,omitempty" json:"sources,omitempty" desc:"The sources for scraping feeds."`
}

type Storage struct {
Expand Down
1 change: 1 addition & 0 deletions pkg/scrape/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ func (c *Config) From(app *config.App) {
URL: app.Scrape.Sources[i].RSS.URL,
RSSHubEndpoint: app.Scrape.RSSHubEndpoint,
RSSHubRoutePath: app.Scrape.Sources[i].RSS.RSSHubRoutePath,
RSSHubAccessKey: app.Scrape.RSSHubAccessKey,
}
}
}
Expand Down
14 changes: 14 additions & 0 deletions pkg/scrape/scraper/rss.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ type ScrapeSourceRSS struct {
URL string
RSSHubEndpoint string
RSSHubRoutePath string
RSSHubAccessKey string
}

func (c *ScrapeSourceRSS) Validate() error {
Expand All @@ -46,9 +47,22 @@ func (c *ScrapeSourceRSS) Validate() error {
return errors.New("URL must be a valid HTTP/HTTPS URL")
}

// Append access key as query parameter if provided
c.appendAccessKey()

return nil
}

func (c *ScrapeSourceRSS) appendAccessKey() {
if c.RSSHubEndpoint != "" && c.RSSHubAccessKey != "" && !strings.Contains(c.URL, "key=") {
if strings.Contains(c.URL, "?") {
c.URL += "&key=" + c.RSSHubAccessKey
} else {
c.URL += "?key=" + c.RSSHubAccessKey
}
}
}

// --- Factory code block ---
func newRSSReader(config *ScrapeSourceRSS) (reader, error) {
if err := config.Validate(); err != nil {
Expand Down
49 changes: 49 additions & 0 deletions pkg/scrape/scraper/rss_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,55 @@ func TestNewRSS(t *testing.T) {
},
},
},
{
Scenario: "Valid Configuration - RSSHub with Access Key",
Given: "a valid configuration with RSSHub details and access key",
When: "creating a new RSS reader",
Then: "should succeed, construct the URL with access key, and return a valid reader",
GivenDetail: givenDetail{
config: &ScrapeSourceRSS{
RSSHubEndpoint: "http://rsshub.app/",
RSSHubRoutePath: "/_/test",
RSSHubAccessKey: "testkey",
},
},
WhenDetail: whenDetail{},
ThenExpected: thenExpected{
wantErr: false,
validateFunc: func(t *testing.T, r reader) {
Expect(r).NotTo(BeNil())
rssReader, ok := r.(*rssReader)
Expect(ok).To(BeTrue())
Expect(rssReader.config.URL).To(Equal("http://rsshub.app/_/test?key=testkey"))
Expect(rssReader.config.RSSHubEndpoint).To(Equal("http://rsshub.app/"))
Expect(rssReader.config.RSSHubRoutePath).To(Equal("/_/test"))
Expect(rssReader.config.RSSHubAccessKey).To(Equal("testkey"))
},
},
},
{
Scenario: "Valid Configuration - URL with Access Key",
Given: "a valid configuration with URL and access key",
When: "creating a new RSS reader",
Then: "should succeed, append access key to URL, and return a valid reader",
GivenDetail: givenDetail{
config: &ScrapeSourceRSS{
URL: "http://example.com/feed",
RSSHubAccessKey: "testkey",
},
},
WhenDetail: whenDetail{},
ThenExpected: thenExpected{
wantErr: false,
validateFunc: func(t *testing.T, r reader) {
Expect(r).NotTo(BeNil())
rssReader, ok := r.(*rssReader)
Expect(ok).To(BeTrue())
Expect(rssReader.config.URL).To(Equal("http://example.com/feed"))
Expect(rssReader.config.RSSHubAccessKey).To(Equal("testkey"))
},
},
},
}

// --- Run tests ---
Expand Down