1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 import cProfile
22 import os
23 import pstats
24 import random
25 import sys
26
27 from translate.storage import factory
28
29
31 """class to aid in benchmarking Translate Toolkit stores"""
32
33 - def __init__(self, test_dir, storeclass):
34 """sets up benchmarking on the test directory"""
35 self.test_dir = os.path.abspath(test_dir)
36 self.StoreClass = storeclass
37 self.extension = self.StoreClass.Extensions[0]
38 self.project_dir = os.path.join(self.test_dir, "benchmark")
39 self.file_dir = os.path.join(self.project_dir, "zxx")
40
42 """removes the given directory"""
43 if os.path.exists(self.test_dir):
44 for dirpath, subdirs, filenames in os.walk(self.test_dir, topdown=False):
45 for name in filenames:
46 os.remove(os.path.join(dirpath, name))
47 for name in subdirs:
48 os.rmdir(os.path.join(dirpath, name))
49 if os.path.exists(self.test_dir):
50 os.rmdir(self.test_dir)
51 assert not os.path.exists(self.test_dir)
52
53 - def create_sample_files(self, num_dirs, files_per_dir, strings_per_file, source_words_per_string, target_words_per_string):
54 """creates sample files for benchmarking"""
55 if not os.path.exists(self.test_dir):
56 os.mkdir(self.test_dir)
57 if not os.path.exists(self.project_dir):
58 os.mkdir(self.project_dir)
59 if not os.path.exists(self.file_dir):
60 os.mkdir(self.file_dir)
61 for dirnum in range(num_dirs):
62 if num_dirs > 1:
63 dirname = os.path.join(self.file_dir, "sample_%d" % dirnum)
64 if not os.path.exists(dirname):
65 os.mkdir(dirname)
66 else:
67 dirname = self.file_dir
68 for filenum in range(files_per_dir):
69 sample_file = self.StoreClass()
70 for stringnum in range(strings_per_file):
71 source_string = " ".join(["word%d" % (random.randint(0, strings_per_file) * i) for i in range(source_words_per_string)])
72 sample_unit = sample_file.addsourceunit(source_string)
73 sample_unit.target = " ".join(["drow%d" % (random.randint(0, strings_per_file) * i) for i in range(target_words_per_string)])
74 sample_file.savefile(os.path.join(dirname, "file_%d.%s" % (filenum, self.extension)))
75
77 """parses all the files in the test directory into memory"""
78 count = 0
79 for dirpath, subdirs, filenames in os.walk(self.file_dir, topdown=False):
80 for name in filenames:
81 pofilename = os.path.join(dirpath, name)
82 parsedfile = self.StoreClass(open(pofilename, 'r'))
83 count += len(parsedfile.units)
84 print "counted %d units" % count
85
86 if __name__ == "__main__":
87 storetype = "po"
88 if len(sys.argv) > 1:
89 storetype = sys.argv[1]
90 if storetype in factory.classes:
91 storeclass = factory.classes[storetype]
92 else:
93 print "StoreClass: '%s' is not a base class that the class factory can load" % storetype
94 sys.exit()
95 for sample_file_sizes in [
96
97
98 (1, 1, 10000, 5, 10),
99
100
101
102
103
104
105
106
107 ]:
108 benchmarker = TranslateBenchmarker("BenchmarkDir", storeclass)
109 benchmarker.clear_test_dir()
110 benchmarker.create_sample_files(*sample_file_sizes)
111 methods = [("create_sample_files", "*sample_file_sizes"), ("parse_file", ""), ]
112 for methodname, methodparam in methods:
113 print methodname, "%d dirs, %d files, %d strings, %d/%d words" % sample_file_sizes
114 print "_______________________________________________________"
115 statsfile = "%s_%s" % (methodname, storetype) + '_%d_%d_%d_%d_%d.stats' % sample_file_sizes
116 cProfile.run('benchmarker.%s(%s)' % (methodname, methodparam), statsfile)
117 stats = pstats.Stats(statsfile)
118 stats.sort_stats('cumulative').print_stats(20)
119 print "_______________________________________________________"
120
121