#!/usr/local/bin/python
#
# script for manual insertion of downloaded files from repository into working copy
# useful for importing large files that are impossible to update without resume
#
# it uses svn command line to get information about file from repository
#
# WARNING: you need to be sure that the file you are importing is the latest one
#          at the moment of operation - the script is unable to verify if file
#          supplied corresponds to latest version, which information is fetched

# legend: * feature, - not in priority
#
# * check if file exists
# * read .svn/entries file to check if it has supported format
# * get and parse file info from remote working copy
# * compare md5 (build md5 and fetch md5)
# * look if file entry is already there (if not - insert, if yes - replace)
#
# techtonik // php.net 2006-12-17

import md5
import os
import stat
import sys
import datetime
from xml.dom.minidom import parseString
from shutil import copyfile
import logging
import datetime
import difflib


logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)-8s %(message)s', datefmt='%d%b%y %H:%M:%S')
#  uncomment for debug
#logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(levelname)-8s %(message)s', datefmt='%d%b%y %H:%M:%S')


if len(sys.argv) < 2:
    print 'usage: ', sys.argv[0], 'filename'
    print ''
    print 'remember to backup your repository path'
    sys.exit()

filename = sys.argv[1]
logging.debug("args %r" % sys.argv)
logging.info("filename %s" % filename)


if not os.path.isfile(filename):
    sys.exit('file "' + filename + '" does not exist or is not a file')
if not os.path.isfile(".svn/entries"):
    sys.exit('not a working copy - could not find .svn/entries')
logging.info("filename exists in working copy")


# read .svn/entries and check if its format is supported
supportedformat = [8]
try:
    entfile = open(".svn/entries", 'rb')
except IOError:
    sys.exit('.svn/entries could not be read')
entstrings = entfile.readlines()
entfile.close()

if (int(entstrings[0]) not in supportedformat):
    sys.exit('format version ' + entstrings[0].strip() + ' of .svn/entries is not supprted')


entry = ''
entries = []

# split strings into entry list for convenience
for entstr in entstrings:
    entry += entstr
    if entstr[0] == '\x0C':
        entries += [entry.splitlines()]
        entry = ''


# get remote svn file path to check for file information (first entry string no.5)
svndirpath = entries[0][4].strip() + "/" + filename
svnstream = os.popen("svn info --xml " + svndirpath)
#debug svnstream = open('metaxml')
ech = svnstream.read()
svnstream.close()
if len(ech) == 0:
    sys.exit('svn info returned zero response - as if there is no such file in repository')
logging.debug("svn info output\n %s" % ech)
echxml = parseString(ech)


# building entry for svn/.entries
# building md5 for local file
try:
    insfile = open(filename, 'rb')
except IOError:
    sys.exit('file "' + filename + '" could not be read')

filemd5 = md5.new()
while 1:
    data = insfile.read(4096)
    if not data: break
    filemd5.update(data)

insfile.close()
localmd5 = filemd5.hexdigest()
logging.debug("local file md5 %s" % localmd5)

# get file modification time
moddt = datetime.datetime.utcfromtimestamp(os.stat(filename).st_mtime)
modtime = moddt.strftime("%Y-%m-%dT%H:%M:%S") + ".%06dZ" % moddt.microsecond
logging.debug("local file modification time %s" % modtime)

# get repository last changed time
lastchgd = echxml.getElementsByTagName("date")[0].childNodes[0].nodeValue
lastrev   = echxml.getElementsByTagName("commit")[0].getAttribute("revision")
lastauthor = echxml.getElementsByTagName("author")[0].childNodes[0].nodeValue
logging.debug("repository file changed %s in revision %s by %s " % (lastchgd, lastrev, lastauthor))

entry = []
entry.append(filename)   # FILENAME                (i.e ZOMBIE.JPG)
entry.append('file')     # type                    (i.e file)
entry[2:7] = [''] * 4    # ...                                  four empty strings for unknown reason
entry[7:] = [modtime]    # local_last_modification_time     (i.e. 2006-10-01T17:07:20.000000Z)
entry.append(localmd5)   # md5                    (i.e 1908665aa1304486ea66227fc6954d83)
entry.append(lastchgd)   # repository_last_changed_time        (2006-08-14T20:36:23.303009Z)
entry[10:] = [lastrev]   # last_changed_revision        (122)
entry.append(lastauthor) # last_changed_author            (zingbat)
# entry.append('has props') # has props                unknown
entry.append("\x0C\n")   # 

entry = "\n".join(entry)
entry = entry.splitlines()

logging.debug("file entry info %s" % entry)


# and look if our file is already among .entries and modify the array accordingly
for i in range(1, len(entries)):
    if entries[i][0] == filename:
        logging.info("file entry found at position %d" % i)
        diffls = []
        for d in difflib.unified_diff(entries[i], entry): 
            diffls.append(d)
        if len(diffls) == 0:
            logging.info("file entry is already updated to its latest version")
            sys.exit()
        logging.debug("file entry diff %s" % "\n".join(diffls))
        if ("".join(diffls).find("+" + localmd5) == -1):
            logging.info("file checksum isn't changed - aborting to avoid errors")
            sys.exit("")
        
        entries[i] = entry
        break
else:
    entries.append(entry)


# write .entries back
logging.debug("removing r/o attribute from .svn/entries")
att = os.stat(".svn/entries")[0]
os.chmod(".svn/entries", stat.S_IWRITE)
try:
    entfile = open(".svn/entries", 'wb')
except IOError:
    sys.exit('error .svn/entries could not be written')
logging.info("writing .svn/entries")
for entry in entries:
    entfile.write("\n".join(entry) + "\n")
entfile.close()
os.chmod(".svn/entries", att)

# copy file to reserved directory 
logging.info("saving copy to .svn/text-base")
basename = ".svn/text-base/" + filename + ".svn-base"
if os.path.isfile(basename):
    logging.debug("remove old version from .svn/text-base")
    os.chmod(basename, stat.S_IWRITE)
    os.unlink(basename)
copyfile(filename, basename)

logging.info("done.")