diff --git a/README.md b/README.md
index 150e6c7..c3444a9 100644
--- a/README.md
+++ b/README.md
@@ -135,6 +135,7 @@ htmltest uses a YAML configuration file. Put `.htmltest.yml` in the same directo
| `DirectoryIndex` | The file to look for when linking to a directory. | `index.html` |
| `FilePath` | Single file to test within `DirectoryPath`, omit to test all. | |
| `FileExtension` | Extension of your HTML documents, includes the dot. If `FilePath` is set we use the extension from that. | `.html` |
+| `BaseURL` | Publication URL of the site, including subfolder if applicable. | |
| `CheckDoctype` | Enables checking the document type declaration. | `true` |
| `CheckAnchors` | Enables checking ``. | `false` |
| `EnforceHTTPS` | Fails when encountering an `http://` link. Useful to prevent mixed content errors when serving over HTTPS. | `false` |
| `IgnoreURLs` | Array of regexs of URLs to ignore. | empty |
diff --git a/htmldoc/document_store.go b/htmldoc/document_store.go
index adf4386..59533c4 100644
--- a/htmldoc/document_store.go
+++ b/htmldoc/document_store.go
@@ -4,9 +4,11 @@
package htmldoc
import (
+ "net/url"
"os"
"path"
"regexp"
+ "strings"
"github.com/wjdp/htmltest/output"
)
@@ -14,6 +16,7 @@ import (
// DocumentStore struct, store of Documents including Document discovery
type DocumentStore struct {
BasePath string // Path, relative to cwd, the site is located in
+ BaseURL *url.URL // Base URL of the site
IgnorePatterns []interface{} // Regexes of directories to ignore
Documents []*Document // All of the documents, used to iterate over
DocumentPathMap map[string]*Document // Maps slash separated paths to documents
@@ -104,7 +107,17 @@ func (dS *DocumentStore) ResolvePath(refPath string) (*Document, bool) {
if refPath[0] == '/' && len(refPath) > 1 {
// Is an absolute link, remove the leading slash for map lookup
- refPath = refPath[1:]
+ if dS.BaseURL == nil {
+ // No base URL, so `/` means our root
+ refPath = refPath[1:]
+ } else {
+ // We have a Base URL, so need to trip off the base path if present
+ refPath = strings.TrimPrefix(refPath, dS.BaseURL.Path)
+
+ // We want to end up with a relative path, so remove leading '/' if present
+ // (This happens if BaseURL does *not* end in '/')
+ refPath = strings.TrimPrefix(refPath, "/")
+ }
}
// Try path as-is, path.ext
diff --git a/htmltest/check-link.go b/htmltest/check-link.go
index aacdaa1..5f20122 100644
--- a/htmltest/check-link.go
+++ b/htmltest/check-link.go
@@ -17,8 +17,9 @@ import (
)
// ignoredRels: List of rel values to ignore, dns-prefetch and preconnect are ignored as they are not links to be
-// followed rather telling browser we want something on that host, if the root of that host is not valid,
-// it's likely not a problem.
+//
+// followed rather telling browser we want something on that host, if the root of that host is not valid,
+// it's likely not a problem.
var ignoredRels = [...]string{"dns-prefetch", "preconnect"}
func (hT *HTMLTest) checkLink(document *htmldoc.Document, node *html.Node) {
@@ -139,6 +140,26 @@ func (hT *HTMLTest) checkExternal(ref *htmldoc.Reference) {
return
}
+ // Is this an external reference to a local file?
+ if hT.opts.CheckSelfReferencesAsInternal && hT.documentStore.BaseURL != nil {
+
+ if ref.URL.Host == hT.documentStore.BaseURL.Host &&
+ hT.documentStore.BaseURL.User == nil &&
+ strings.HasPrefix(ref.URL.Path, hT.documentStore.BaseURL.Path) {
+ // Convert to internal reference
+ internalURL := *ref.URL
+ internalURL.Scheme = ""
+ internalURL.Host = ""
+
+ internalRef := *ref
+ internalRef.URL = &internalURL
+ internalRef.Path = internalURL.String()
+
+ hT.checkInternal(&internalRef)
+ return
+ }
+ }
+
urlStr := ref.URLString()
// Does this url match an url ignore rule?
diff --git a/htmltest/check-link_test.go b/htmltest/check-link_test.go
index 3322b02..82ce2d5 100644
--- a/htmltest/check-link_test.go
+++ b/htmltest/check-link_test.go
@@ -767,6 +767,53 @@ func TestAnchorBlankHTML4(t *testing.T) {
tExpectIssueCount(t, hT2, 1)
}
+func TestAnchorInternalAbsolute(t *testing.T) {
+ // works for internal absolute links
+ hT := tTestFile("fixtures/links/absoluteLinks.html")
+ tExpectIssueCount(t, hT, 0)
+}
+
+func TestAnchorInternalAbsoluteRootPublication(t *testing.T) {
+ // works for internal absolute links when site is published to root of domain
+ hT := tTestFileOpts("fixtures/links/absoluteLinksRootPublication.html",
+ map[string]interface{}{"BaseURL": "http://example.com"})
+ tExpectIssueCount(t, hT, 0)
+}
+
+func TestAnchorInternalAbsoluteFolderPublication(t *testing.T) {
+ // works for internal absolute links when site is published to a folder
+ hT := tTestFileOpts("fixtures/links/absoluteLinksFolderPublication.html",
+ map[string]interface{}{"BaseURL": "http://www.example.com/blog"})
+ tExpectIssueCount(t, hT, 0)
+}
+
+func TestAnchorInternalBrokenAbsoluteFolderPublication(t *testing.T) {
+ // works for missing internal absolute links when site is published to a folder
+ hT := tTestFileOpts("fixtures/links/absoluteBrokenLinksFolderPublication.html",
+ map[string]interface{}{"BaseURL": "http://www.example.com/blog"})
+ tExpectIssueCount(t, hT, 2)
+}
+
+func TestAnchorExternalLinksToInternalFiles(t *testing.T) {
+ // works for external links that reference internal files
+ hT := tTestFileOpts("fixtures/links/externalLinksToInternalFiles.html",
+ map[string]interface{}{
+ "BaseURL": "http://www.example.com/blog",
+ "CheckSelfReferencesAsInternal": true,
+ })
+ tExpectIssueCount(t, hT, 0)
+}
+
+func TestAnchorExternalLinksToExternalPages(t *testing.T) {
+ // works for external links that don't reference internal files because the subfolder is different
+ hT := tTestFileOpts("fixtures/links/externalLinksToExternalPages.html",
+ map[string]interface{}{
+ "BaseURL": "http://www.example.com/blog",
+ "CheckSelfReferencesAsInternal": true,
+ })
+ tExpectIssueCount(t, hT, 2)
+}
+
func TestSelfSignedLink(t *testing.T) {
tSkipShortExternal(t)
hT := tTestFileOpts("fixtures/links/selfSignedLink.html",
diff --git a/htmltest/fixtures/links/absoluteBrokenLinksFolderPublication.html b/htmltest/fixtures/links/absoluteBrokenLinksFolderPublication.html
new file mode 100644
index 0000000..542bb7f
--- /dev/null
+++ b/htmltest/fixtures/links/absoluteBrokenLinksFolderPublication.html
@@ -0,0 +1,2 @@
+Missing file in root of site
+Missing file in folder of site
\ No newline at end of file
diff --git a/htmltest/fixtures/links/absoluteLinks.html b/htmltest/fixtures/links/absoluteLinks.html
new file mode 100644
index 0000000..bd32420
--- /dev/null
+++ b/htmltest/fixtures/links/absoluteLinks.html
@@ -0,0 +1,2 @@
+Relative to root
+Also relative to root
\ No newline at end of file
diff --git a/htmltest/fixtures/links/absoluteLinksFolderPublication.html b/htmltest/fixtures/links/absoluteLinksFolderPublication.html
new file mode 100644
index 0000000..25aba9e
--- /dev/null
+++ b/htmltest/fixtures/links/absoluteLinksFolderPublication.html
@@ -0,0 +1,2 @@
+Relative to root
+Also relative to root
\ No newline at end of file
diff --git a/htmltest/fixtures/links/absoluteLinksRootPublication.html b/htmltest/fixtures/links/absoluteLinksRootPublication.html
new file mode 100644
index 0000000..bd32420
--- /dev/null
+++ b/htmltest/fixtures/links/absoluteLinksRootPublication.html
@@ -0,0 +1,2 @@
+Relative to root
+Also relative to root
\ No newline at end of file
diff --git a/htmltest/fixtures/links/externalLinksToExternalPages.html b/htmltest/fixtures/links/externalLinksToExternalPages.html
new file mode 100644
index 0000000..b1d90f5
--- /dev/null
+++ b/htmltest/fixtures/links/externalLinksToExternalPages.html
@@ -0,0 +1,2 @@
+Path 'folder' does not match, should not be mapped to internal file
+Path 'issues' does not match, should not map to known file
\ No newline at end of file
diff --git a/htmltest/fixtures/links/externalLinksToInternalFiles.html b/htmltest/fixtures/links/externalLinksToInternalFiles.html
new file mode 100644
index 0000000..abe3ea6
--- /dev/null
+++ b/htmltest/fixtures/links/externalLinksToInternalFiles.html
@@ -0,0 +1,2 @@
+External HTTP reference
+External HTTPS reference
\ No newline at end of file
diff --git a/htmltest/htmltest.go b/htmltest/htmltest.go
index e30bc62..3cec597 100644
--- a/htmltest/htmltest.go
+++ b/htmltest/htmltest.go
@@ -7,6 +7,7 @@ import (
"errors"
"fmt"
"net/http"
+ "net/url"
"os"
"path"
"strings"
@@ -149,6 +150,17 @@ func Test(optsUser map[string]interface{}) (*HTMLTest, error) {
hT.documentStore.DirectoryIndex = hT.opts.DirectoryIndex
hT.documentStore.IgnorePatterns = hT.opts.IgnoreDirs
hT.documentStore.IgnoreTagAttribute = hT.opts.IgnoreTagAttribute
+
+ if hT.opts.BaseURL != "" {
+ baseURL, err := url.Parse(hT.opts.BaseURL)
+ if err != nil {
+ err := fmt.Errorf("Could not parse BaseURL '%s': %w", hT.opts.BaseURL, err)
+ return &hT, err
+ }
+
+ hT.documentStore.BaseURL = baseURL
+ }
+
// Discover documents
hT.documentStore.Discover()
diff --git a/htmltest/options.go b/htmltest/options.go
index 6ed0088..3e5dd3b 100644
--- a/htmltest/options.go
+++ b/htmltest/options.go
@@ -19,6 +19,8 @@ type Options struct {
FilePath string
FileExtension string
+ BaseURL string
+
CheckDoctype bool
CheckAnchors bool
CheckLinks bool
@@ -27,13 +29,14 @@ type Options struct {
CheckMeta bool
CheckGeneric bool
- CheckExternal bool
- CheckInternal bool
- CheckInternalHash bool
- CheckMailto bool
- CheckTel bool
- CheckFavicon bool
- CheckMetaRefresh bool
+ CheckExternal bool
+ CheckInternal bool
+ CheckInternalHash bool
+ CheckMailto bool
+ CheckTel bool
+ CheckFavicon bool
+ CheckMetaRefresh bool
+ CheckSelfReferencesAsInternal bool
EnforceHTML5 bool
EnforceHTTPS bool