diff --git a/cmd/wp-go-static/commands/robots.go b/cmd/wp-go-static/commands/robots.go new file mode 100644 index 0000000..2b2c1bc --- /dev/null +++ b/cmd/wp-go-static/commands/robots.go @@ -0,0 +1,107 @@ +package commands + +import ( + "fmt" + "io" + "net/http" + "net/url" + "os" + "path/filepath" + "strings" + + "wp-go-static/internal/config" + + "github.com/spf13/cobra" + "github.com/spf13/pflag" + "github.com/spf13/viper" +) + +// RobotsCmd ... +var RobotsCmd = &cobra.Command{ + Use: "robots", + Short: "Create robots from the Wordpress website", + RunE: robotsCmdF, +} + +const ( + bindFlagRobotsPrefix = "robots" +) + +func init() { + // Define command-line flags + RobotsCmd.PersistentFlags().String("dir", "dump", "directory to save downloaded files") + RobotsCmd.PersistentFlags().String("url", "", "URL to scrape") + RobotsCmd.PersistentFlags().String("replace-url", "", "Replace with a specific url") + RobotsCmd.PersistentFlags().String("file", "robots.txt", "Output robots file name") + + // Bind command-line flags to Viper + RobotsCmd.PersistentFlags().VisitAll(func(flag *pflag.Flag) { + bindFlag := fmt.Sprintf("%s.%s", bindFlagRobotsPrefix, flag.Name) + viper.BindPFlag(bindFlag, RobotsCmd.PersistentFlags().Lookup(flag.Name)) + }) + + RootCmd.AddCommand(RobotsCmd) +} + +func robotsCmdF(command *cobra.Command, args []string) error { + config := config.Config{} + viper.Unmarshal(&config) + + // Fetch the robots.txt from the provided URL + resp, err := http.Get(config.Robots.URL) + if err != nil { + return err + } + defer resp.Body.Close() + + // Read the response body into a string + body, err := io.ReadAll(resp.Body) + if err != nil { + return err + } + + modifiedBody := string(body) + + if config.Robots.ReplaceURL != "" { + // Perform the search and replace operation + modifiedBody = strings.ReplaceAll(string(body), config.Robots.URL, config.Robots.ReplaceURL) + } + + if config.Robots.ReplaceURL != "" { + currentURL, _ := url.Parse(config.Robots.URL) + host := currentURL.Host + optionList := []string{ + "http://" + host, + "http:\\/\\/" + host, + "https://" + host, + "https:\\/\\/" + host, + } + + for _, option := range optionList { + // Find the matching string + if !strings.Contains(modifiedBody, option) { + continue + } + + // Replace the string + modifiedBody = strings.ReplaceAll(modifiedBody, option, config.Robots.ReplaceURL) + } + } + + // Print the output + fmt.Println(modifiedBody) + + // Create a new file + out, err := os.Create(filepath.Join(config.Robots.Dir, config.Robots.File)) + if err != nil { + return err + } + defer out.Close() + + // Write the modified string to the new file + if _, err := out.WriteString(modifiedBody); err != nil { + return err + } + + return nil +} diff --git a/cmd/wp-go-static/commands/scrape.go b/cmd/wp-go-static/commands/scrape.go index 96f016a..a0af764 100644 --- a/cmd/wp-go-static/commands/scrape.go +++ b/cmd/wp-go-static/commands/scrape.go @@ -160,7 +160,6 @@ func scrapeCmdF(command *cobra.Command, args []string) error { }) urlsToVisit := []string{ - "robots.txt", "favicon.ico", } diff --git a/internal/config/config.go b/internal/config/config.go index 7adb0df..11dedc3 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -3,6 +3,7 @@ package config type Config struct { Scrape ScrapeConfig `mapstructure:"scrape"` Sitemap SitemapConfig `mapstructure:"sitemap"` + Robots RobotsConfig `mapstructure:"robots"` } type SitemapConfig struct { @@ -22,3 +23,10 @@ type ScrapeConfig struct { Images bool `mapstructure:"images"` CheckHead bool `mapstructure:"check-head"` } + +type RobotsConfig struct { + Dir string `mapstructure:"dir"` + URL string `mapstructure:"url"` + ReplaceURL string `mapstructure:"replace-url"` + File string `mapstructure:"file"` +}