diff --git a/core/block/bookmark/bookmark_service.go b/core/block/bookmark/bookmark_service.go index e42b52b4cc..fbc2273398 100644 --- a/core/block/bookmark/bookmark_service.go +++ b/core/block/bookmark/bookmark_service.go @@ -3,10 +3,6 @@ package bookmark import ( "context" "fmt" - "io" - "io/ioutil" - "net/http" - "os" "path/filepath" "strings" "sync" @@ -280,7 +276,7 @@ func (s *service) ContentUpdaters(spaceID string, url string, parseBlock bool) ( wg.Add(1) go func() { defer wg.Done() - hash, err := s.loadImage(spaceID, data.Title, data.ImageUrl) + hash, err := s.loadImage(spaceID, getFileNameFromURL(url, "cover"), data.ImageUrl) if err != nil { log.Errorf("load image: %s", err) return @@ -297,7 +293,7 @@ func (s *service) ContentUpdaters(spaceID string, url string, parseBlock bool) ( wg.Add(1) go func() { defer wg.Done() - hash, err := s.loadImage(spaceID, "", data.FaviconUrl) + hash, err := s.loadImage(spaceID, getFileNameFromURL(url, "icon"), data.FaviconUrl) if err != nil { log.Errorf("load favicon: %s", err) return @@ -372,55 +368,34 @@ func (s *service) fetcher(spaceID string, blockID string, params bookmark.FetchP return nil } -func (s *service) loadImage(spaceId string, title, url string) (hash string, err error) { - uploader := s.fileUploaderFactory.NewUploader(spaceId, objectorigin.Bookmark()) - - tempDir := s.tempDirService.TempDir() - ctx, cancel := context.WithTimeout(context.Background(), time.Minute) - defer cancel() +func fileNameAddSuffix(fileName string, suffix string) string { + ext := filepath.Ext(fileName) + fileName = strings.TrimSuffix(fileName, ext) + return fileName + suffix + ext +} - req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) +func getFileNameFromURL(url, filename string) string { + u, err := uri.ParseURI(url) if err != nil { - return + return "" } - resp, err := http.DefaultClient.Do(req) - if err != nil { - return + var urlFileExt string + lastPath := strings.Split(u.Path, "/") + if len(lastPath) > 0 { + urlFileExt = filepath.Ext(lastPath[len(lastPath)-1]) } - defer resp.Body.Close() - if resp.StatusCode != http.StatusOK { - return "", fmt.Errorf("download image: %s", resp.Status) - } - - tmpFile, err := ioutil.TempFile(tempDir, "anytype_downloaded_file_*") - if err != nil { - return "", err - } - defer os.Remove(tmpFile.Name()) - - _, err = io.Copy(tmpFile, resp.Body) - if err != nil { - return "", err - } - - _, err = tmpFile.Seek(0, io.SeekStart) - if err != nil { - return "", err - } + source := strings.TrimPrefix(u.Hostname(), "www.") + source = strings.ReplaceAll(source, ".", "_") + return source + "_" + filename + urlFileExt +} - fileName := strings.Split(filepath.Base(url), "?")[0] - if value := resp.Header.Get("Content-Disposition"); value != "" { - contentDisposition := strings.Split(value, "filename=") - if len(contentDisposition) > 1 { - fileName = strings.Trim(contentDisposition[1], "\"") - } +func (s *service) loadImage(spaceId string, title, url string) (hash string, err error) { + uploader := s.fileUploaderFactory.NewUploader(spaceId, objectorigin.Bookmark()) - } + ctx, cancel := context.WithTimeout(context.Background(), time.Minute) + defer cancel() - if title != "" { - fileName = title - } - res := uploader.SetName(fileName).SetFile(tmpFile.Name()).SetImageKind(model.ImageKind_AutomaticallyAdded).Upload(ctx) + res := uploader.SetName(title).SetUrl(url).SetImageKind(model.ImageKind_AutomaticallyAdded).Upload(ctx) return res.FileObjectId, res.Err } diff --git a/core/converter/md/md.go b/core/converter/md/md.go index fa69edc01b..a9b3fa56a1 100644 --- a/core/converter/md/md.go +++ b/core/converter/md/md.go @@ -218,13 +218,19 @@ func (h *MD) renderFile(buf writer, in *renderState, b *model.Block) { if file == nil || file.State != model.BlockContentFile_Done { return } - name := escape.MarkdownCharacters(html.EscapeString(file.Name)) + title, filename, ok := h.getLinkInfo(file.TargetObjectId) + if !ok { + filename = h.fn.Get("files", file.TargetObjectId, filepath.Base(file.Name), filepath.Ext(file.Name)) + title = filepath.Base(file.Name) + } else { + filename = filepath.Base(filename) + } buf.WriteString(in.indent) if file.Type != model.BlockContentFile_Image { - fmt.Fprintf(buf, "[%s](%s) \n", name, h.fn.Get("files", file.TargetObjectId, filepath.Base(file.Name), filepath.Ext(file.Name))) + fmt.Fprintf(buf, "[%s](%s) \n", title, filename) h.fileHashes = append(h.fileHashes, file.TargetObjectId) } else { - fmt.Fprintf(buf, "![%s](%s) \n", name, h.fn.Get("files", file.TargetObjectId, filepath.Base(file.Name), filepath.Ext(file.Name))) + fmt.Fprintf(buf, "![%s](%s) \n", title, filename) h.imageHashes = append(h.imageHashes, file.TargetObjectId) } } @@ -395,12 +401,29 @@ func (h *MD) getLinkInfo(docId string) (title, filename string, ok bool) { return } title = pbtypes.GetString(info, bundle.RelationKeyName.String()) + // if object is a file + layout := model.ObjectTypeLayout(pbtypes.GetInt64(info, bundle.RelationKeyLayout.String())) + if layout == model.ObjectType_file || layout == model.ObjectType_image || layout == model.ObjectType_audio || layout == model.ObjectType_video { + title = pbtypes.GetString(info, bundle.RelationKeyName.String()) + ext := pbtypes.GetString(info, bundle.RelationKeyFileExt.String()) + if ext != "" { + ext = "." + ext + } + title = strings.TrimSuffix(title, ext) + if title == "" { + title = docId + } + filename = h.fn.Get("files", docId, title, ext) + return + } + if title == "" { title = pbtypes.GetString(info, bundle.RelationKeySnippet.String()) } if title == "" { title = docId } + filename = h.fn.Get("", docId, title, h.Ext()) return } diff --git a/core/files/fileuploader/uploader.go b/core/files/fileuploader/uploader.go index a14dec269b..372439eee5 100644 --- a/core/files/fileuploader/uploader.go +++ b/core/files/fileuploader/uploader.go @@ -273,7 +273,6 @@ func (u *uploader) SetUrl(url string) Uploader { if err != nil { // do nothing } - u.SetName(strings.Split(filepath.Base(url), "?")[0]) u.getReader = func(ctx context.Context) (*fileReader, error) { req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) if err != nil { @@ -295,6 +294,9 @@ func (u *uploader) SetUrl(url string) Uploader { fileName = strings.Trim(contentDisposition[1], "\"") } } + if fileName == "" { + fileName = uri.GetFileNameFromURLAndContentType(resp.Request.URL, resp.Header.Get("Content-Type")) + } tmpFile, err := ioutil.TempFile(u.tempDirProvider.TempDir(), "anytype_downloaded_file_*") if err != nil { @@ -333,7 +335,10 @@ func (u *uploader) SetUrl(url string) Uploader { } func (u *uploader) SetFile(path string) Uploader { - u.SetName(filepath.Base(path)) + if u.name == "" { + // only set name if it wasn't explicitly set before + u.SetName(filepath.Base(path)) + } u.setLastModifiedDate(path) u.getReader = func(ctx context.Context) (*fileReader, error) { @@ -421,7 +426,12 @@ func (u *uploader) Upload(ctx context.Context) (result UploadResult) { } if fileName := buf.GetFileName(); fileName != "" { - u.SetName(fileName) + if u.name == "" { + u.SetName(fileName) + } else if filepath.Ext(u.name) == "" { + // enrich current name with extension + u.name = u.name + filepath.Ext(fileName) + } } if u.block != nil { diff --git a/util/uri/uri.go b/util/uri/uri.go index ca80ceae58..a8336750b4 100644 --- a/util/uri/uri.go +++ b/util/uri/uri.go @@ -2,8 +2,10 @@ package uri import ( "fmt" + "mime" "net/url" "os" + "path/filepath" "regexp" "strings" ) @@ -84,3 +86,68 @@ func NormalizeAndParseURI(uri string) (*url.URL, error) { return url.Parse(normalizeURI(uri)) } + +var preferredExtensions = map[string]string{ + "image/jpeg": ".jpeg", + "audio/mpeg": ".mp3", + // Add more preferred mappings if needed +} + +func GetFileNameFromURLAndContentType(u *url.URL, contentType string) string { + var host string + if u != nil { + + lastSegment := filepath.Base(u.Path) + // Determine if this looks like a real filename. We'll say it's real if it has a dot or is a hidden file starting with a dot. + if lastSegment == "." || lastSegment == "" || (!strings.HasPrefix(lastSegment, ".") && !strings.Contains(lastSegment, ".")) { + // Not a valid filename + lastSegment = "" + } + + if lastSegment != "" { + // A plausible filename was found directly in the URL + return lastSegment + } + + // No filename, fallback to host-based + host = strings.TrimPrefix(u.Hostname(), "www.") + host = strings.ReplaceAll(host, ".", "_") + if host == "" { + host = "file" + } + } + + // Try to get a preferred extension for the content type + var ext string + if preferred, ok := preferredExtensions[contentType]; ok { + ext = preferred + } else { + extensions, err := mime.ExtensionsByType(contentType) + if err != nil || len(extensions) == 0 { + // Fallback if no known extension + extensions = []string{".bin"} + } + ext = extensions[0] + } + + // Determine a base name from content type + base := "file" + if strings.HasPrefix(contentType, "image/") { + base = "image" + } else if strings.HasPrefix(contentType, "audio/") { + base = "audio" + } else if strings.HasPrefix(contentType, "video/") { + base = "video" + } + + var res strings.Builder + if host != "" { + res.WriteString(host) + res.WriteString("_") + } + res.WriteString(base) + if ext != "" { + res.WriteString(ext) + } + return res.String() +} diff --git a/util/uri/uri_test.go b/util/uri/uri_test.go index 29fe0ad068..208b3fe527 100644 --- a/util/uri/uri_test.go +++ b/util/uri/uri_test.go @@ -1,6 +1,7 @@ package uri import ( + "net/url" "testing" "github.com/stretchr/testify/assert" @@ -118,3 +119,114 @@ func TestURI_ValidateURI(t *testing.T) { assert.NoError(t, err) }) } + +func TestGetFileNameFromURLWithContentTypeAndMime(t *testing.T) { + mustParseURL := func(s string) *url.URL { + u, err := url.Parse(s) + if err != nil { + t.Fatalf("url.Parse(%q) failed: %v", s, err) + } + return u + } + + tests := []struct { + name string + url *url.URL + contentType string + expected string + }{ + { + name: "URL with explicit filename and extension", + url: mustParseURL("https://example.com/image.jpg"), + contentType: "image/jpeg", + expected: "image.jpg", + }, + { + name: "URL with explicit filename and extension, but wrong content type", + url: mustParseURL("https://example.com/image.jpg"), + contentType: "image/png", + expected: "image.jpg", + }, + { + name: "URL with explicit filename and extension, and empty content type", + url: mustParseURL("https://example.com/image.jpg"), + contentType: "", + expected: "image.jpg", + }, + { + name: "URL with query and fragment, explicit filename", + url: mustParseURL("https://example.com/file.jpeg?query=1#111"), + contentType: "image/jpeg", + expected: "file.jpeg", + }, + { + name: "No filename in URL, fallback to host and image/jpeg", + url: mustParseURL("https://www.example.com/path/to/"), + contentType: "image/jpeg", + // host -> example_com + // image/jpeg typically corresponds to .jpeg or .jpg (mime usually returns .jpeg) + expected: "example_com_image.jpeg", + }, + { + name: "Host-only URL, fallback with image/png", + url: mustParseURL("https://www.example.com"), + contentType: "image/png", + expected: "example_com_image.png", + }, + { + name: "Filename present with video/mp4", + url: mustParseURL("https://www.sub.example.co.uk/folder/video.mp4"), + contentType: "video/mp4", + expected: "video.mp4", + }, + { + name: "No extension but filename present", + url: mustParseURL("https://example.com/filename"), + contentType: "image/gif", + expected: "example_com_image.gif", + }, + { + name: "Invalid URL returns empty", + url: nil, + contentType: "image/jpeg", + expected: "image.jpeg", + }, + { + name: "No filename, video/unknown fallback to .bin", + url: mustParseURL("https://www.subdomain.example.com/folder/"), + contentType: "video/unknown", + // no known extension for "video/unknown", fallback .bin + expected: "subdomain_example_com_video.bin", + }, + { + name: "Hidden file as filename", + url: mustParseURL("https://example.com/.htaccess"), + contentType: "text/plain", + expected: ".htaccess", + }, + { + name: "URL with query but no filename extension, fallback audio/mpeg", + url: mustParseURL("https://example.com/path?version=2"), + contentType: "audio/mpeg", + // audio/mpeg known extension: .mp3 + expected: "example_com_audio.mp3", + }, + { + name: "Unknown type entirely", + url: mustParseURL("https://example.net/"), + contentType: "application/x-something-strange", + // no filename, fallback host: example_net + // unknown type -> .bin + expected: "example_net_file.bin", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := GetFileNameFromURLAndContentType(tt.url, tt.contentType) + if got != tt.expected { + t.Errorf("GetFileNameFromURL(%q, %q) = %q; want %q", tt.url, tt.contentType, got, tt.expected) + } + }) + } +}