Coverage for IdentifySite.py: 0%

274 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-09-27 18:50 +0000

1#!/usr/bin/env python 

2# -*- coding: utf-8 -*- 

3 

4#Copyright 2010-2011 Chaz Littlejohn 

5#This program is free software: you can redistribute it and/or modify 

6#it under the terms of the GNU Affero General Public License as published by 

7#the Free Software Foundation, version 3 of the License. 

8# 

9#This program is distributed in the hope that it will be useful, 

10#but WITHOUT ANY WARRANTY; without even the implied warranty of 

11#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

12#GNU General Public License for more details. 

13# 

14#You should have received a copy of the GNU Affero General Public License 

15#along with this program. If not, see <http://www.gnu.org/licenses/>. 

16#In the "official" distribution you can find the license in agpl-3.0.txt. 

17 

18from __future__ import print_function 

19 

20 

21#import L10n 

22#_ = L10n.get_translation() 

23 

24import re 

25import sys 

26import os 

27from time import time 

28from optparse import OptionParser 

29import codecs 

30 

31import Database 

32 

33import Configuration 

34import logging 

35try: 

36 import xlrd 

37except: 

38 xlrd = None 

39# logging has been set up in fpdb.py or HUD_main.py, use their settings: 

40log = logging.getLogger("parser") 

41 

42re_Divider, re_Head, re_XLS = {}, {}, {} 

43re_Divider['PokerStars'] = re.compile(r'^Hand #(\d+)\s*$', re.MULTILINE) 

44re_Divider['Fulltilt'] = re.compile(r'\*{20}\s#\s\d+\s\*{15,25}\s?', re.MULTILINE) 

45re_Head['Fulltilt'] = re.compile(r'^((BEGIN)?\n)?FullTiltPoker.+\n\nSeat', re.MULTILINE) 

46re_XLS['PokerStars'] = re.compile(r'Tournaments\splayed\sby\s\'.+?\'') 

47re_XLS['Fulltilt'] = re.compile(r'Player\sTournament\sReport\sfor\s.+?\s\(.*\)') 

48 

49class FPDBFile(object): 

50 path = "" 

51 ftype = None # Valid: hh, summary, both 

52 site = None 

53 kodec = None 

54 archive = False 

55 archiveHead = False 

56 archiveDivider = False 

57 gametype = False 

58 hero = '-' 

59 

60 def __init__(self, path): 

61 self.path = path 

62 

63class Site(object): 

64 

65 def __init__(self, name, hhc_fname, filter_name, summary, obj): 

66 self.name = name 

67 # FIXME: rename filter to hhc_fname 

68 self.hhc_fname = hhc_fname 

69 # FIXME: rename filter_name to hhc_type 

70 self.filter_name = filter_name 

71 self.re_SplitHands = obj.re_SplitHands 

72 self.codepage = obj.codepage 

73 self.copyGameHeader = obj.copyGameHeader 

74 self.summaryInFile = obj.summaryInFile 

75 self.re_Identify = obj.re_Identify 

76 #self.obj = obj 

77 if summary: 

78 self.summary = summary 

79 self.re_SumIdentify = getattr(__import__(summary), summary, None).re_Identify 

80 else: 

81 self.summary = None 

82 self.line_delimiter = self.getDelimiter(filter_name) 

83 self.line_addendum = self.getAddendum(filter_name) 

84 self.spaces = filter_name == 'Entraction' 

85 self.getHeroRegex(obj, filter_name) 

86 

87 def getDelimiter(self, filter_name): 

88 line_delimiter = None 

89 if filter_name == 'PokerStars': 

90 line_delimiter = '\n\n' 

91 elif filter_name == 'Fulltilt' or filter_name == 'PokerTracker': 

92 line_delimiter = '\n\n\n' 

93 elif self.re_SplitHands.match('\n\n') and filter_name != 'Entraction': 

94 line_delimiter = '\n\n' 

95 elif self.re_SplitHands.match('\n\n\n'): 

96 line_delimiter = '\n\n\n' 

97 

98 return line_delimiter 

99 

100 def getAddendum(self, filter_name): 

101 line_addendum = '' 

102 if filter_name == 'OnGame': 

103 line_addendum = '*' 

104 elif filter_name == 'Merge': 

105 line_addendum = '<' 

106 elif filter_name == 'Entraction': 

107 line_addendum = '\n\n' 

108 

109 return line_addendum 

110 

111 def getHeroRegex(self, obj, filter_name): 

112 self.re_HeroCards = None 

113 if hasattr(obj, 're_HeroCards'): 

114 if filter_name not in ('Bovada', 'Enet'): 

115 self.re_HeroCards = obj.re_HeroCards 

116 if filter_name == 'PokerTracker': 

117 self.re_HeroCards1 = obj.re_HeroCards1 

118 self.re_HeroCards2 = obj.re_HeroCards2 

119 

120class IdentifySite(object): 

121 def __init__(self, config, hhcs = None): 

122 self.config = config 

123 self.codepage = ("utf8", "utf-16", "cp1252", "ISO-8859-1") 

124 self.sitelist = {} 

125 self.filelist = {} 

126 self.generateSiteList(hhcs) 

127 

128 def scan(self, path): 

129 if os.path.isdir(path): 

130 self.walkDirectory(path, self.sitelist) 

131 else: 

132 self.processFile(path) 

133 

134 def get_fobj(self, file): 

135 try: 

136 fobj = self.filelist[file] 

137 except KeyError: 

138 return False 

139 return fobj 

140 

141 def get_filelist(self): 

142 return self.filelist 

143 

144 def clear_filelist(self): 

145 self.filelist = {} 

146 

147 def generateSiteList(self, hhcs): 

148 """Generates a ordered dictionary of site, filter and filter name for each site in hhcs""" 

149 if not hhcs: 

150 hhcs = self.config.hhcs 

151 for site, hhc in list(hhcs.items()): 

152 filter = hhc.converter 

153 filter_name = filter.replace("ToFpdb", "") 

154 summary = hhc.summaryImporter 

155 mod = __import__(filter) 

156 obj = getattr(mod, filter_name, None) 

157 try: 

158 self.sitelist[obj.siteId] = Site(site, filter, filter_name, summary, obj) 

159 except Exception as e: 

160 log.error("Failed to load HH importer: %s. %s" % (filter_name, e)) 

161 self.re_Identify_PT = getattr(__import__("PokerTrackerToFpdb"), "PokerTracker", None).re_Identify 

162 self.re_SumIdentify_PT = getattr(__import__("PokerTrackerSummary"), "PokerTrackerSummary", None).re_Identify 

163 

164 def walkDirectory(self, dir, sitelist): 

165 """Walks a directory, and executes a callback on each file""" 

166 dir = os.path.abspath(dir) 

167 for file in [file for file in os.listdir(dir) if not file in [".",".."]]: 

168 nfile = os.path.join(dir,file) 

169 if os.path.isdir(nfile): 

170 self.walkDirectory(nfile, sitelist) 

171 else: 

172 self.processFile(nfile) 

173 

174 def __listof(self, x): 

175 if isinstance(x, list) or isinstance(x, tuple): 

176 return x 

177 else: 

178 return [x] 

179 

180 def processFile(self, path): 

181 print('process fill identify',path) 

182 if path not in self.filelist: 

183 print('filelist', self.filelist) 

184 whole_file, kodec = self.read_file(path) 

185 # print('whole_file',whole_file) 

186 print('kodec',kodec ) 

187 if whole_file: 

188 fobj = self.idSite(path, whole_file, kodec) 

189 print('siteid obj') 

190 #print(fobj.path) 

191 if fobj == False: # Site id failed 

192 log.debug(("DEBUG:") + " " + ("siteId Failed for: %s") % path) 

193 else: 

194 self.filelist[path] = fobj 

195 

196 def read_file(self, in_path): 

197 if in_path.endswith('.xls') or in_path.endswith('.xlsx') and xlrd: 

198 try: 

199 wb = xlrd.open_workbook(in_path) 

200 sh = wb.sheet_by_index(0) 

201 header = str(sh.cell(0,0).value) 

202 return header, 'utf-8' 

203 except: 

204 return None, None 

205 for kodec in self.codepage: 

206 try: 

207 infile = codecs.open(in_path, 'r', kodec) 

208 whole_file = infile.read() 

209 infile.close() 

210 return whole_file, kodec 

211 except: 

212 continue 

213 return None, None 

214 

215 def idSite(self, path, whole_file, kodec): 

216 """Identifies the site the hh file originated from""" 

217 f = FPDBFile(path) 

218 f.kodec = kodec 

219 #DEBUG:print('idsite path',path ) 

220 #DEBUG:print('idsite f',f,f.ftype,f.site,f.gametype ) 

221 

222 #DEBUG:print('idsite self.sitelist.items',self.sitelist.items()) 

223 for id, site in list(self.sitelist.items()): 

224 filter_name = site.filter_name 

225 m = site.re_Identify.search(whole_file[:5000]) 

226 if m and filter_name in ('Fulltilt', 'PokerStars'): 

227 m1 = re_Divider[filter_name].search(whole_file.replace('\r\n', '\n')) 

228 if m1: 

229 f.archive = True 

230 f.archiveDivider = True 

231 elif re_Head.get(filter_name) and re_Head[filter_name].match(whole_file[:5000].replace('\r\n', '\n')): 

232 f.archive = True 

233 f.archiveHead = True 

234 if m: 

235 f.site = site 

236 f.ftype = "hh" 

237 if f.site.re_HeroCards: 

238 h = f.site.re_HeroCards.search(whole_file[:5000]) 

239 if h and 'PNAME' in h.groupdict(): 

240 f.hero = h.group('PNAME') 

241 else: 

242 f.hero = 'Hero' 

243 return f 

244 

245 for id, site in list(self.sitelist.items()): 

246 if site.summary: 

247 if path.endswith('.xls') or path.endswith('.xlsx'): 

248 filter_name = site.filter_name 

249 if filter_name in ('Fulltilt', 'PokerStars'): 

250 m2 = re_XLS[filter_name].search(whole_file[:5000]) 

251 if m2: 

252 f.site = site 

253 f.ftype = "summary" 

254 return f 

255 else: 

256 m3 = site.re_SumIdentify.search(whole_file[:10000]) 

257 if m3: 

258 f.site = site 

259 f.ftype = "summary" 

260 return f 

261 

262 m1 = self.re_Identify_PT.search(whole_file[:5000]) 

263 m2 = self.re_SumIdentify_PT.search(whole_file[:100]) 

264 if m1 or m2: 

265 filter = 'PokerTrackerToFpdb' 

266 filter_name = 'PokerTracker' 

267 mod = __import__(filter) 

268 obj = getattr(mod, filter_name, None) 

269 summary = 'PokerTrackerSummary' 

270 f.site = Site('PokerTracker', filter, filter_name, summary, obj) 

271 if m1: 

272 f.ftype = "hh" 

273 if re.search(u'\*{2}\sGame\sID\s', m1.group()): 

274 f.site.line_delimiter = None 

275 f.site.re_SplitHands = re.compile(u'End\sof\sgame\s\d+') 

276 elif re.search(u'\*{2}\sHand\s\#\s', m1.group()): 

277 f.site.line_delimiter = None 

278 f.site.re_SplitHands = re.compile(u'Rake:\s[^\s]+') 

279 elif re.search(u'Server\spoker\d+\.ipoker\.com', whole_file[:250]): 

280 f.site.line_delimiter = None 

281 f.site.spaces = True 

282 f.site.re_SplitHands = re.compile(u'GAME\s\#') 

283 m3 = f.site.re_HeroCards1.search(whole_file[:5000]) 

284 if m3: 

285 f.hero = m3.group('PNAME') 

286 else: 

287 m4 = f.site.re_HeroCards2.search(whole_file[:5000]) 

288 if m4: 

289 f.hero = m4.group('PNAME') 

290 else: 

291 f.ftype = "summary" 

292 return f 

293 

294 return False 

295 

296 def getFilesForSite(self, sitename, ftype): 

297 l = [] 

298 for name, f in list(self.filelist.items()): 

299 if f.ftype != None and f.site.name == sitename and f.ftype == "hh": 

300 l.append(f) 

301 return l 

302 

303 def fetchGameTypes(self): 

304 for name, f in list(self.filelist.items()): 

305 if f.ftype != None and f.ftype == "hh": 

306 try: #TODO: this is a dirty hack. Borrowed from fpdb_import 

307 name = str(name, "utf8", "replace") 

308 except TypeError: 

309 log.error(TypeError) 

310 mod = __import__(f.site.hhc_fname) 

311 obj = getattr(mod, f.site.filter_name, None) 

312 hhc = obj(self.config, in_path = name, sitename = f.site.hhc_fname, autostart = False) 

313 if hhc.readFile(): 

314 f.gametype = hhc.determineGameType(hhc.whole_file) 

315 

316def main(argv=None): 

317 if argv is None: 

318 argv = sys.argv[1:] 

319 

320 Configuration.set_logfile("fpdb-log.txt") 

321 config = Configuration.Config(file = "HUD_config.test.xml") 

322 in_path = os.path.abspath('regression-test-files') 

323 IdSite = IdentifySite(config) 

324 start = time() 

325 IdSite.scan(in_path) 

326 print('duration', time() - start) 

327 

328 print("\n----------- SITE LIST -----------") 

329 for sid, site in list(IdSite.sitelist.items()): 

330 print("%2d: Name: %s HHC: %s Summary: %s" %(sid, site.name, site.filter_name, site.summary)) 

331 print("----------- END SITE LIST -----------") 

332 

333 print("\n----------- ID REGRESSION FILES -----------") 

334 count = 0 

335 for f, ffile in list(IdSite.filelist.items()): 

336 tmp = "" 

337 tmp += ": Type: %s " % ffile.ftype 

338 count += 1 

339 if ffile.ftype == "hh": 

340 tmp += "Conv: %s" % ffile.site.hhc_fname 

341 elif ffile.ftype == "summary": 

342 tmp += "Conv: %s" % ffile.site.summary 

343 print(f, tmp) 

344 print(count, 'files identified') 

345 print("----------- END ID REGRESSION FILES -----------") 

346 

347 print("----------- RETRIEVE FOR SINGLE SITE -----------") 

348 IdSite.getFilesForSite("PokerStars", "hh") 

349 print("----------- END RETRIEVE FOR SINGLE SITE -----------") 

350 

351if __name__ == '__main__': 

352 sys.exit(main())