Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GO-4753 rework file names detection when downloading files #1962

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 23 additions & 48 deletions core/block/bookmark/bookmark_service.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,6 @@
import (
"context"
"fmt"
"io"
"io/ioutil"
"net/http"
"os"
"path/filepath"
"strings"
"sync"
Expand Down Expand Up @@ -280,7 +276,7 @@
wg.Add(1)
go func() {
defer wg.Done()
hash, err := s.loadImage(spaceID, data.Title, data.ImageUrl)
hash, err := s.loadImage(spaceID, getFileNameFromURL(url, "cover"), data.ImageUrl)
if err != nil {
log.Errorf("load image: %s", err)
return
Expand All @@ -297,7 +293,7 @@
wg.Add(1)
go func() {
defer wg.Done()
hash, err := s.loadImage(spaceID, "", data.FaviconUrl)
hash, err := s.loadImage(spaceID, getFileNameFromURL(url, "icon"), data.FaviconUrl)
if err != nil {
log.Errorf("load favicon: %s", err)
return
Expand Down Expand Up @@ -372,55 +368,34 @@
return nil
}

func (s *service) loadImage(spaceId string, title, url string) (hash string, err error) {
uploader := s.fileUploaderFactory.NewUploader(spaceId, objectorigin.Bookmark())

tempDir := s.tempDirService.TempDir()
ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
defer cancel()
func fileNameAddSuffix(fileName string, suffix string) string {

Check failure on line 371 in core/block/bookmark/bookmark_service.go

View workflow job for this annotation

GitHub Actions / lint

func `fileNameAddSuffix` is unused (unused)
ext := filepath.Ext(fileName)
fileName = strings.TrimSuffix(fileName, ext)
return fileName + suffix + ext
}

req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
func getFileNameFromURL(url, filename string) string {
u, err := uri.ParseURI(url)
if err != nil {
return
return ""
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
return
var urlFileExt string
lastPath := strings.Split(u.Path, "/")
if len(lastPath) > 0 {
urlFileExt = filepath.Ext(lastPath[len(lastPath)-1])
}
defer resp.Body.Close()

if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("download image: %s", resp.Status)
}

tmpFile, err := ioutil.TempFile(tempDir, "anytype_downloaded_file_*")
if err != nil {
return "", err
}
defer os.Remove(tmpFile.Name())

_, err = io.Copy(tmpFile, resp.Body)
if err != nil {
return "", err
}

_, err = tmpFile.Seek(0, io.SeekStart)
if err != nil {
return "", err
}
source := strings.TrimPrefix(u.Hostname(), "www.")
source = strings.ReplaceAll(source, ".", "_")
return source + "_" + filename + urlFileExt
}

fileName := strings.Split(filepath.Base(url), "?")[0]
if value := resp.Header.Get("Content-Disposition"); value != "" {
contentDisposition := strings.Split(value, "filename=")
if len(contentDisposition) > 1 {
fileName = strings.Trim(contentDisposition[1], "\"")
}
func (s *service) loadImage(spaceId string, title, url string) (hash string, err error) {
uploader := s.fileUploaderFactory.NewUploader(spaceId, objectorigin.Bookmark())

}
ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
defer cancel()

if title != "" {
fileName = title
}
res := uploader.SetName(fileName).SetFile(tmpFile.Name()).SetImageKind(model.ImageKind_AutomaticallyAdded).Upload(ctx)
res := uploader.SetName(title).SetUrl(url).SetImageKind(model.ImageKind_AutomaticallyAdded).Upload(ctx)
return res.FileObjectId, res.Err
}
29 changes: 26 additions & 3 deletions core/converter/md/md.go
Original file line number Diff line number Diff line change
Expand Up @@ -218,13 +218,19 @@
if file == nil || file.State != model.BlockContentFile_Done {
return
}
name := escape.MarkdownCharacters(html.EscapeString(file.Name))
title, filename, ok := h.getLinkInfo(file.TargetObjectId)
if !ok {
filename = h.fn.Get("files", file.TargetObjectId, filepath.Base(file.Name), filepath.Ext(file.Name))
title = filepath.Base(file.Name)
} else {
filename = filepath.Base(filename)
}
buf.WriteString(in.indent)
if file.Type != model.BlockContentFile_Image {
fmt.Fprintf(buf, "[%s](%s) \n", name, h.fn.Get("files", file.TargetObjectId, filepath.Base(file.Name), filepath.Ext(file.Name)))
fmt.Fprintf(buf, "[%s](%s) \n", title, filename)
h.fileHashes = append(h.fileHashes, file.TargetObjectId)
} else {
fmt.Fprintf(buf, "![%s](%s) \n", name, h.fn.Get("files", file.TargetObjectId, filepath.Base(file.Name), filepath.Ext(file.Name)))
fmt.Fprintf(buf, "![%s](%s) \n", title, filename)
h.imageHashes = append(h.imageHashes, file.TargetObjectId)
}
}
Expand Down Expand Up @@ -395,12 +401,29 @@
return
}
title = pbtypes.GetString(info, bundle.RelationKeyName.String())
// if object is a file
layout := model.ObjectTypeLayout(pbtypes.GetInt64(info, bundle.RelationKeyLayout.String()))

Check failure on line 405 in core/converter/md/md.go

View workflow job for this annotation

GitHub Actions / lint

G115: integer overflow conversion int64 -> int32 (gosec)
if layout == model.ObjectType_file || layout == model.ObjectType_image || layout == model.ObjectType_audio || layout == model.ObjectType_video {
title = pbtypes.GetString(info, bundle.RelationKeyName.String())
ext := pbtypes.GetString(info, bundle.RelationKeyFileExt.String())
if ext != "" {
ext = "." + ext
}
title = strings.TrimSuffix(title, ext)
if title == "" {
title = docId
}
filename = h.fn.Get("files", docId, title, ext)
return
}

if title == "" {
title = pbtypes.GetString(info, bundle.RelationKeySnippet.String())
}
if title == "" {
title = docId
}

filename = h.fn.Get("", docId, title, h.Ext())
return
}
Expand Down
16 changes: 13 additions & 3 deletions core/files/fileuploader/uploader.go
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,6 @@
if err != nil {
// do nothing
}
u.SetName(strings.Split(filepath.Base(url), "?")[0])
u.getReader = func(ctx context.Context) (*fileReader, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
Expand All @@ -295,6 +294,9 @@
fileName = strings.Trim(contentDisposition[1], "\"")
}
}
if fileName == "" {
fileName = uri.GetFileNameFromURLAndContentType(resp.Request.URL, resp.Header.Get("Content-Type"))
}

tmpFile, err := ioutil.TempFile(u.tempDirProvider.TempDir(), "anytype_downloaded_file_*")
if err != nil {
Expand Down Expand Up @@ -333,7 +335,10 @@
}

func (u *uploader) SetFile(path string) Uploader {
u.SetName(filepath.Base(path))
if u.name == "" {
// only set name if it wasn't explicitly set before
u.SetName(filepath.Base(path))
}
u.setLastModifiedDate(path)

u.getReader = func(ctx context.Context) (*fileReader, error) {
Expand Down Expand Up @@ -421,7 +426,12 @@
}

if fileName := buf.GetFileName(); fileName != "" {
u.SetName(fileName)
if u.name == "" {
u.SetName(fileName)
} else if filepath.Ext(u.name) == "" {
// enrich current name with extension
u.name = u.name + filepath.Ext(fileName)

Check failure on line 433 in core/files/fileuploader/uploader.go

View workflow job for this annotation

GitHub Actions / lint

assignOp: replace `u.name = u.name + filepath.Ext(fileName)` with `u.name += filepath.Ext(fileName)` (gocritic)
}
}

if u.block != nil {
Expand Down
67 changes: 67 additions & 0 deletions util/uri/uri.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@ package uri

import (
"fmt"
"mime"
"net/url"
"os"
"path/filepath"
"regexp"
"strings"
)
Expand Down Expand Up @@ -84,3 +86,68 @@ func NormalizeAndParseURI(uri string) (*url.URL, error) {

return url.Parse(normalizeURI(uri))
}

var preferredExtensions = map[string]string{
"image/jpeg": ".jpeg",
"audio/mpeg": ".mp3",
// Add more preferred mappings if needed
}

func GetFileNameFromURLAndContentType(u *url.URL, contentType string) string {
var host string
if u != nil {

lastSegment := filepath.Base(u.Path)
// Determine if this looks like a real filename. We'll say it's real if it has a dot or is a hidden file starting with a dot.
if lastSegment == "." || lastSegment == "" || (!strings.HasPrefix(lastSegment, ".") && !strings.Contains(lastSegment, ".")) {
// Not a valid filename
lastSegment = ""
}

if lastSegment != "" {
// A plausible filename was found directly in the URL
return lastSegment
}

// No filename, fallback to host-based
host = strings.TrimPrefix(u.Hostname(), "www.")
host = strings.ReplaceAll(host, ".", "_")
if host == "" {
host = "file"
}
}

// Try to get a preferred extension for the content type
var ext string
if preferred, ok := preferredExtensions[contentType]; ok {
ext = preferred
} else {
extensions, err := mime.ExtensionsByType(contentType)
if err != nil || len(extensions) == 0 {
// Fallback if no known extension
extensions = []string{".bin"}
}
ext = extensions[0]
}

// Determine a base name from content type
base := "file"
if strings.HasPrefix(contentType, "image/") {
base = "image"
} else if strings.HasPrefix(contentType, "audio/") {
base = "audio"
} else if strings.HasPrefix(contentType, "video/") {
base = "video"
}

var res strings.Builder
if host != "" {
res.WriteString(host)
res.WriteString("_")
}
res.WriteString(base)
if ext != "" {
res.WriteString(ext)
}
return res.String()
}
Loading
Loading