Skip to content

Commit

Permalink
Add --filter-contents
Browse files Browse the repository at this point in the history
  • Loading branch information
atykhyy committed Jun 17, 2018
1 parent e200cec commit 89db1d9
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 8 deletions.
21 changes: 21 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,27 @@ name the -B and -T options allow a mapping file to be specified to
rename branches and tags (respectively). The syntax of the mapping
file is the same as for the author mapping.

Content filtering
-----------------

hg-fast-export supports filtering the content of exported files.
The filter is supplied to the --filter-contents option. hg-fast-export
runs the filter for each exported file, pipes its content to the filter's
standard input, and uses the filter's standard output in place
of the file's original content. The prototypical use of this feature
is to convert line endings in text files from CRLF to git's preferred LF:

```
-- Start of crlf-filter.sh --
#!/bin/sh
# $1 = pathname of exported file relative to the root of the repo
# $2 = Mercurial's hash of the file
# $3 = "1" if Mercurial reports the file as binary, otherwise "0"
if [ "$3" == "1" ]; then cat; else dos2unix; fi
-- End of crlf-filter.sh --
```

Notes/Limitations
-----------------

Expand Down
36 changes: 28 additions & 8 deletions hg-fast-export.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,19 +123,32 @@ def get_author(logmessage,committer,authors):
return r
return committer

def export_file_contents(ctx,manifest,files,hgtags,encoding=''):
def export_file_contents(ctx,manifest,files,hgtags,encoding='',filter_contents=None):
count=0
max=len(files)
for file in files:
# Skip .hgtags files. They only get us in trouble.
if not hgtags and file == ".hgtags":
sys.stderr.write('Skip %s\n' % (file))
continue
d=ctx.filectx(file).data()
if encoding:
filename=file.decode(encoding).encode('utf8')
else:
filename=file
file_ctx=ctx.filectx(file)
d=file_ctx.data()
if filter_contents:
import subprocess
filter_cmd=filter_contents + [filename,node.hex(file_ctx.filenode()),'1' if file_ctx.isbinary() else '0']
try:
filter_proc=subprocess.Popen(filter_cmd,stdin=subprocess.PIPE,stdout=subprocess.PIPE)
d,_=filter_proc.communicate(d)
except:
sys.stderr.write('Running filter-contents %s:\n' % filter_cmd)
raise
filter_ret=filter_proc.poll()
if filter_ret:
raise subprocess.CalledProcessError(filter_ret,filter_cmd)
wr('M %s inline %s' % (gitmode(manifest.flags(file)),
strip_leading_slash(filename)))
wr('data %d' % len(d)) # had some trouble with size()
Expand Down Expand Up @@ -185,7 +198,7 @@ def strip_leading_slash(filename):
return filename

def export_commit(ui,repo,revision,old_marks,max,count,authors,
branchesmap,sob,brmap,hgtags,encoding='',fn_encoding=''):
branchesmap,sob,brmap,hgtags,encoding='',fn_encoding='',filter_contents=None):
def get_branchname(name):
if brmap.has_key(name):
return brmap[name]
Expand Down Expand Up @@ -246,8 +259,8 @@ def get_branchname(name):
removed=[strip_leading_slash(x) for x in removed]

map(lambda r: wr('D %s' % r),removed)
export_file_contents(ctx,man,added,hgtags,fn_encoding)
export_file_contents(ctx,man,changed,hgtags,fn_encoding)
export_file_contents(ctx,man,added,hgtags,fn_encoding,filter_contents)
export_file_contents(ctx,man,changed,hgtags,fn_encoding,filter_contents)
wr()

return checkpoint(count)
Expand Down Expand Up @@ -383,7 +396,7 @@ def verify_heads(ui,repo,cache,force,branchesmap):

def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile,
authors={},branchesmap={},tagsmap={},
sob=False,force=False,hgtags=False,notes=False,encoding='',fn_encoding=''):
sob=False,force=False,hgtags=False,notes=False,encoding='',fn_encoding='',filter_contents=None):
def check_cache(filename, contents):
if len(contents) == 0:
sys.stderr.write('Warning: %s does not contain any data, this will probably make an incremental import fail\n' % filename)
Expand Down Expand Up @@ -425,7 +438,7 @@ def check_cache(filename, contents):
brmap={}
for rev in range(min,max):
c=export_commit(ui,repo,rev,old_marks,max,c,authors,branchesmap,
sob,brmap,hgtags,encoding,fn_encoding)
sob,brmap,hgtags,encoding,fn_encoding,filter_contents)
if notes:
for rev in range(min,max):
c=export_note(ui,repo,rev,c,authors, encoding, rev == min and min != 0)
Expand Down Expand Up @@ -485,6 +498,8 @@ def bail(parser,opt):
help="Assume file names from Mercurial are encoded in <filename_encoding>")
parser.add_option("--mappings-are-raw",dest="raw_mappings", default=False,
help="Assume mappings are raw <key>=<value> lines")
parser.add_option("--filter-contents",dest="filter_contents",
help="Pipe contents of each exported file through FILTER_CONTENTS <file-path> <hg-hash> <is-binary>")

(options,args)=parser.parse_args()

Expand Down Expand Up @@ -523,8 +538,13 @@ def bail(parser,opt):
if options.fn_encoding!=None:
fn_encoding=options.fn_encoding

filter_contents=None
if options.filter_contents!=None:
import shlex
filter_contents=shlex.split(options.filter_contents)

sys.exit(hg2git(options.repourl,m,options.marksfile,options.mappingfile,
options.headsfile, options.statusfile,
authors=a,branchesmap=b,tagsmap=t,
sob=options.sob,force=options.force,hgtags=options.hgtags,
notes=options.notes,encoding=encoding,fn_encoding=fn_encoding))
notes=options.notes,encoding=encoding,fn_encoding=fn_encoding,filter_contents=filter_contents))
2 changes: 2 additions & 0 deletions hg-fast-export.sh
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ Options:
--fe <filename_encoding> Assume filenames from Mercurial are encoded
in <filename_encoding>
--mappings-are-raw Assume mappings are raw <key>=<value> lines
--filter-contents <cmd> Pipe contents of each exported file through <cmd>
with <file-path> <hg-hash> <is-binary> as arguments
"
case "$1" in
-h|--help)
Expand Down

0 comments on commit 89db1d9

Please sign in to comment.