cx2csv/cx2csv.py at main · freedomcat/cx2csv · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
from lxml import etree
import argparse
import os
import csv
import subprocess


def pathConvert(file):
    if ('\\' in file) and (os.name == "posix"):
        try:
            ret = subprocess.run(["wslpath", file], check=True,
                                 capture_output=True, text=True)
            return ret.stdout.strip()
        except subprocess.CalledProcessError as e:
            print(e.cmd)
            print(e.returncode)
            print(e.output)
            print(e.stdout)
            print(e.stderr)
    else:
        return file


def getTarget(replaceData, id):
    for row in replaceData:
        if row['id'] == id:
            return row['target']


parser = argparse.ArgumentParser(
        description="Crowdin EXPORT/IMPORT XLIFF PROCESSER.\
                This script does the following:  \
Output the exported xliff from crowdin to csv format.\
 If you modify the target with csv, when\
 you specify csv for the replace option, apply it to\
 xliff and generate an xliff for import into\
 the crowdin specified by the output option.")

parser.add_argument('exportXliff', help='\
        Specifies the xliff file exported from crowdin.')
parser.add_argument('-r', '--replace',
                    nargs='?', action='store', const='NOFILE',
                    help='Specify csv file with id and target to replace.\
 If this argument is omitted, a file with the extension csv \
that exists in the same path as the file specified by exportXliff is read.')

args = parser.parse_args()

infile = pathConvert(args.exportXliff)

replaceData = []
if args.replace:
    if args.replace == "NOFILE":
        repfile = pathConvert(args.exportXliff).replace(".xliff", ".csv")
    else:
        repfile = pathConvert(args.replace)
    with open(repfile, mode='r', encoding="utf-8-sig") as f:
        csvfile = csv.DictReader(f)
        replaceData = [row for row in csvfile]

tree = etree.parse(infile)
root = tree.getroot()
mynsmap = dict()
mynsmap['x'] = root.nsmap[None]
files = tree.xpath("//x:*/*", namespaces=mynsmap)
fileid = ""
outcsv = []
outcsv.append(["url", "id", "state", "context",  "source", "target"])
for f in files:
    tag = f.tag.rsplit("}")[1]
    if tag == "file":
        fileid = ("/" + f.attrib['id'] + "/" + f.attrib['source-language'] +
                  "-" + f.attrib['target-language'] + "#")
        continue
    if tag == "trans-unit":
        try:
            resname = f.attrib['resname']
        except KeyError:
            continue
        tuid = f.attrib['id']
    if tag == "source":
        source = f.text
        continue
    if tag == "target":
        try:
            state = f.attrib['state']
        except KeyError:
            continue
        if state == "needs-translation":
            target = ""
        else:
            target = f.text
        if args.replace:
          new_text = getTarget(replaceData, tuid)
          if new_text is not None:
            f.text = new_text
            f.attrib['state'] = "translated"
        outcsv.append([fileid+tuid, tuid, state, resname, source, target])

outfile = pathConvert(args.exportXliff).replace('.xliff', '.csv')
with open(outfile, 'w', encoding='UTF-8') as f:
    writer = csv.writer(f, quoting=csv.QUOTE_ALL)
    for row in outcsv:
        writer.writerow(row)

if args.replace:
    outfile = pathConvert(args.exportXliff).replace('.', '-import.')
    tree.write(outfile, encoding='UTF-8')