Coverage for IdentifySite.py: 0%
274 statements
« prev ^ index » next coverage.py v7.6.1, created at 2024-09-28 16:41 +0000
« prev ^ index » next coverage.py v7.6.1, created at 2024-09-28 16:41 +0000
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
4#Copyright 2010-2011 Chaz Littlejohn
5#This program is free software: you can redistribute it and/or modify
6#it under the terms of the GNU Affero General Public License as published by
7#the Free Software Foundation, version 3 of the License.
8#
9#This program is distributed in the hope that it will be useful,
10#but WITHOUT ANY WARRANTY; without even the implied warranty of
11#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12#GNU General Public License for more details.
13#
14#You should have received a copy of the GNU Affero General Public License
15#along with this program. If not, see <http://www.gnu.org/licenses/>.
16#In the "official" distribution you can find the license in agpl-3.0.txt.
18from __future__ import print_function
21#import L10n
22#_ = L10n.get_translation()
24import re
25import sys
26import os
27from time import time
28from optparse import OptionParser
29import codecs
31import Database
33import Configuration
34import logging
35try:
36 import xlrd
37except:
38 xlrd = None
39# logging has been set up in fpdb.py or HUD_main.py, use their settings:
40log = logging.getLogger("parser")
42re_Divider, re_Head, re_XLS = {}, {}, {}
43re_Divider['PokerStars'] = re.compile(r'^Hand #(\d+)\s*$', re.MULTILINE)
44re_Divider['Fulltilt'] = re.compile(r'\*{20}\s#\s\d+\s\*{15,25}\s?', re.MULTILINE)
45re_Head['Fulltilt'] = re.compile(r'^((BEGIN)?\n)?FullTiltPoker.+\n\nSeat', re.MULTILINE)
46re_XLS['PokerStars'] = re.compile(r'Tournaments\splayed\sby\s\'.+?\'')
47re_XLS['Fulltilt'] = re.compile(r'Player\sTournament\sReport\sfor\s.+?\s\(.*\)')
49class FPDBFile(object):
50 path = ""
51 ftype = None # Valid: hh, summary, both
52 site = None
53 kodec = None
54 archive = False
55 archiveHead = False
56 archiveDivider = False
57 gametype = False
58 hero = '-'
60 def __init__(self, path):
61 self.path = path
63class Site(object):
65 def __init__(self, name, hhc_fname, filter_name, summary, obj):
66 self.name = name
67 # FIXME: rename filter to hhc_fname
68 self.hhc_fname = hhc_fname
69 # FIXME: rename filter_name to hhc_type
70 self.filter_name = filter_name
71 self.re_SplitHands = obj.re_SplitHands
72 self.codepage = obj.codepage
73 self.copyGameHeader = obj.copyGameHeader
74 self.summaryInFile = obj.summaryInFile
75 self.re_Identify = obj.re_Identify
76 #self.obj = obj
77 if summary:
78 self.summary = summary
79 self.re_SumIdentify = getattr(__import__(summary), summary, None).re_Identify
80 else:
81 self.summary = None
82 self.line_delimiter = self.getDelimiter(filter_name)
83 self.line_addendum = self.getAddendum(filter_name)
84 self.spaces = filter_name == 'Entraction'
85 self.getHeroRegex(obj, filter_name)
87 def getDelimiter(self, filter_name):
88 line_delimiter = None
89 if filter_name == 'PokerStars':
90 line_delimiter = '\n\n'
91 elif filter_name == 'Fulltilt' or filter_name == 'PokerTracker':
92 line_delimiter = '\n\n\n'
93 elif self.re_SplitHands.match('\n\n') and filter_name != 'Entraction':
94 line_delimiter = '\n\n'
95 elif self.re_SplitHands.match('\n\n\n'):
96 line_delimiter = '\n\n\n'
98 return line_delimiter
100 def getAddendum(self, filter_name):
101 line_addendum = ''
102 if filter_name == 'OnGame':
103 line_addendum = '*'
104 elif filter_name == 'Merge':
105 line_addendum = '<'
106 elif filter_name == 'Entraction':
107 line_addendum = '\n\n'
109 return line_addendum
111 def getHeroRegex(self, obj, filter_name):
112 self.re_HeroCards = None
113 if hasattr(obj, 're_HeroCards'):
114 if filter_name not in ('Bovada', 'Enet'):
115 self.re_HeroCards = obj.re_HeroCards
116 if filter_name == 'PokerTracker':
117 self.re_HeroCards1 = obj.re_HeroCards1
118 self.re_HeroCards2 = obj.re_HeroCards2
120class IdentifySite(object):
121 def __init__(self, config, hhcs = None):
122 self.config = config
123 self.codepage = ("utf8", "utf-16", "cp1252", "ISO-8859-1")
124 self.sitelist = {}
125 self.filelist = {}
126 self.generateSiteList(hhcs)
128 def scan(self, path):
129 if os.path.isdir(path):
130 self.walkDirectory(path, self.sitelist)
131 else:
132 self.processFile(path)
134 def get_fobj(self, file):
135 try:
136 fobj = self.filelist[file]
137 except KeyError:
138 return False
139 return fobj
141 def get_filelist(self):
142 return self.filelist
144 def clear_filelist(self):
145 self.filelist = {}
147 def generateSiteList(self, hhcs):
148 """Generates a ordered dictionary of site, filter and filter name for each site in hhcs"""
149 if not hhcs:
150 hhcs = self.config.hhcs
151 for site, hhc in list(hhcs.items()):
152 filter = hhc.converter
153 filter_name = filter.replace("ToFpdb", "")
154 summary = hhc.summaryImporter
155 mod = __import__(filter)
156 obj = getattr(mod, filter_name, None)
157 try:
158 self.sitelist[obj.siteId] = Site(site, filter, filter_name, summary, obj)
159 except Exception as e:
160 log.error("Failed to load HH importer: %s. %s" % (filter_name, e))
161 self.re_Identify_PT = getattr(__import__("PokerTrackerToFpdb"), "PokerTracker", None).re_Identify
162 self.re_SumIdentify_PT = getattr(__import__("PokerTrackerSummary"), "PokerTrackerSummary", None).re_Identify
164 def walkDirectory(self, dir, sitelist):
165 """Walks a directory, and executes a callback on each file"""
166 dir = os.path.abspath(dir)
167 for file in [file for file in os.listdir(dir) if not file in [".",".."]]:
168 nfile = os.path.join(dir,file)
169 if os.path.isdir(nfile):
170 self.walkDirectory(nfile, sitelist)
171 else:
172 self.processFile(nfile)
174 def __listof(self, x):
175 if isinstance(x, list) or isinstance(x, tuple):
176 return x
177 else:
178 return [x]
180 def processFile(self, path):
181 print('process fill identify',path)
182 if path not in self.filelist:
183 print('filelist', self.filelist)
184 whole_file, kodec = self.read_file(path)
185 # print('whole_file',whole_file)
186 print('kodec',kodec )
187 if whole_file:
188 fobj = self.idSite(path, whole_file, kodec)
189 print('siteid obj')
190 #print(fobj.path)
191 if fobj == False: # Site id failed
192 log.debug(("DEBUG:") + " " + ("siteId Failed for: %s") % path)
193 else:
194 self.filelist[path] = fobj
196 def read_file(self, in_path):
197 if in_path.endswith('.xls') or in_path.endswith('.xlsx') and xlrd:
198 try:
199 wb = xlrd.open_workbook(in_path)
200 sh = wb.sheet_by_index(0)
201 header = str(sh.cell(0,0).value)
202 return header, 'utf-8'
203 except:
204 return None, None
205 for kodec in self.codepage:
206 try:
207 infile = codecs.open(in_path, 'r', kodec)
208 whole_file = infile.read()
209 infile.close()
210 return whole_file, kodec
211 except:
212 continue
213 return None, None
215 def idSite(self, path, whole_file, kodec):
216 """Identifies the site the hh file originated from"""
217 f = FPDBFile(path)
218 f.kodec = kodec
219 #DEBUG:print('idsite path',path )
220 #DEBUG:print('idsite f',f,f.ftype,f.site,f.gametype )
222 #DEBUG:print('idsite self.sitelist.items',self.sitelist.items())
223 for id, site in list(self.sitelist.items()):
224 filter_name = site.filter_name
225 m = site.re_Identify.search(whole_file[:5000])
226 if m and filter_name in ('Fulltilt', 'PokerStars'):
227 m1 = re_Divider[filter_name].search(whole_file.replace('\r\n', '\n'))
228 if m1:
229 f.archive = True
230 f.archiveDivider = True
231 elif re_Head.get(filter_name) and re_Head[filter_name].match(whole_file[:5000].replace('\r\n', '\n')):
232 f.archive = True
233 f.archiveHead = True
234 if m:
235 f.site = site
236 f.ftype = "hh"
237 if f.site.re_HeroCards:
238 h = f.site.re_HeroCards.search(whole_file[:5000])
239 if h and 'PNAME' in h.groupdict():
240 f.hero = h.group('PNAME')
241 else:
242 f.hero = 'Hero'
243 return f
245 for id, site in list(self.sitelist.items()):
246 if site.summary:
247 if path.endswith('.xls') or path.endswith('.xlsx'):
248 filter_name = site.filter_name
249 if filter_name in ('Fulltilt', 'PokerStars'):
250 m2 = re_XLS[filter_name].search(whole_file[:5000])
251 if m2:
252 f.site = site
253 f.ftype = "summary"
254 return f
255 else:
256 m3 = site.re_SumIdentify.search(whole_file[:10000])
257 if m3:
258 f.site = site
259 f.ftype = "summary"
260 return f
262 m1 = self.re_Identify_PT.search(whole_file[:5000])
263 m2 = self.re_SumIdentify_PT.search(whole_file[:100])
264 if m1 or m2:
265 filter = 'PokerTrackerToFpdb'
266 filter_name = 'PokerTracker'
267 mod = __import__(filter)
268 obj = getattr(mod, filter_name, None)
269 summary = 'PokerTrackerSummary'
270 f.site = Site('PokerTracker', filter, filter_name, summary, obj)
271 if m1:
272 f.ftype = "hh"
273 if re.search(u'\*{2}\sGame\sID\s', m1.group()):
274 f.site.line_delimiter = None
275 f.site.re_SplitHands = re.compile(u'End\sof\sgame\s\d+')
276 elif re.search(u'\*{2}\sHand\s\#\s', m1.group()):
277 f.site.line_delimiter = None
278 f.site.re_SplitHands = re.compile(u'Rake:\s[^\s]+')
279 elif re.search(u'Server\spoker\d+\.ipoker\.com', whole_file[:250]):
280 f.site.line_delimiter = None
281 f.site.spaces = True
282 f.site.re_SplitHands = re.compile(u'GAME\s\#')
283 m3 = f.site.re_HeroCards1.search(whole_file[:5000])
284 if m3:
285 f.hero = m3.group('PNAME')
286 else:
287 m4 = f.site.re_HeroCards2.search(whole_file[:5000])
288 if m4:
289 f.hero = m4.group('PNAME')
290 else:
291 f.ftype = "summary"
292 return f
294 return False
296 def getFilesForSite(self, sitename, ftype):
297 l = []
298 for name, f in list(self.filelist.items()):
299 if f.ftype != None and f.site.name == sitename and f.ftype == "hh":
300 l.append(f)
301 return l
303 def fetchGameTypes(self):
304 for name, f in list(self.filelist.items()):
305 if f.ftype != None and f.ftype == "hh":
306 try: #TODO: this is a dirty hack. Borrowed from fpdb_import
307 name = str(name, "utf8", "replace")
308 except TypeError:
309 log.error(TypeError)
310 mod = __import__(f.site.hhc_fname)
311 obj = getattr(mod, f.site.filter_name, None)
312 hhc = obj(self.config, in_path = name, sitename = f.site.hhc_fname, autostart = False)
313 if hhc.readFile():
314 f.gametype = hhc.determineGameType(hhc.whole_file)
316def main(argv=None):
317 if argv is None:
318 argv = sys.argv[1:]
320 Configuration.set_logfile("fpdb-log.txt")
321 config = Configuration.Config(file = "HUD_config.test.xml")
322 in_path = os.path.abspath('regression-test-files')
323 IdSite = IdentifySite(config)
324 start = time()
325 IdSite.scan(in_path)
326 print('duration', time() - start)
328 print("\n----------- SITE LIST -----------")
329 for sid, site in list(IdSite.sitelist.items()):
330 print("%2d: Name: %s HHC: %s Summary: %s" %(sid, site.name, site.filter_name, site.summary))
331 print("----------- END SITE LIST -----------")
333 print("\n----------- ID REGRESSION FILES -----------")
334 count = 0
335 for f, ffile in list(IdSite.filelist.items()):
336 tmp = ""
337 tmp += ": Type: %s " % ffile.ftype
338 count += 1
339 if ffile.ftype == "hh":
340 tmp += "Conv: %s" % ffile.site.hhc_fname
341 elif ffile.ftype == "summary":
342 tmp += "Conv: %s" % ffile.site.summary
343 print(f, tmp)
344 print(count, 'files identified')
345 print("----------- END ID REGRESSION FILES -----------")
347 print("----------- RETRIEVE FOR SINGLE SITE -----------")
348 IdSite.getFilesForSite("PokerStars", "hh")
349 print("----------- END RETRIEVE FOR SINGLE SITE -----------")
351if __name__ == '__main__':
352 sys.exit(main())