#!/usr/bin/python3

# creates vertical to encode new fields

import sys
import manatee

use_hash = True
corpname = sys.argv[1]
struct = sys.argv[2]
src_attr = sys.argv[3]
new_name = sys.argv[4]
map_filename = sys.argv[5]

corp = manatee.Corpus(corpname)
manatee_struct = corp.get_struct(struct)
manatee_attr = manatee_struct.get_attr(src_attr)
#src_attr_list = [manatee_attr.pos2str(i) for i in range(manatee_attr.size())]

sys.stderr.write('reading hash %s ...\n' % map_filename)
if use_hash:
        hash = {}
        map_file = open(map_filename)
        for index, line in enumerate(map_file):
                if index % 100000 == 0: sys.stderr.write('%d hash lines read...\n' % index)
                try:
                        key, value = line.strip().split('\t')
                except ValueError:
                        if line.startswith('\t'):
                                key, value = '', line.strip()
                        else:
                                raise ValueError('invalid format of mapping file: line %d' % (index + 1))
                hash[key] = value

sys.stderr.write('writing vertical ...\n')
for i in range(manatee_attr.size()):
        src_value = manatee_attr.pos2str(i)
        if use_hash:
                new_value = hash.get(src_value, '')
                print ('<%s %s="%s" %s="%s">' % ( struct, new_name, new_value, 
                	src_attr, src_value ))
                print ('x')
                print ( '</%s>'  %  struct )
