Kevin Kien: Tra cứu nghĩa tiếng Việt của các từ trong một file văn bản!

import urllib, re, time

textFile = open("E:\LearningProg\Python\english.txt","r")#just a test text file

textDoc = textFile.read()

charset = "[a-zA-Z]+" #set of character for words

words = re.findall(charset,textDoc) #find all word in the text

htmlBody = ""

count = 1

for theWord in words[0:100]:#example with 100 word

wordDict = urllib.urlopen("http://vdict.com/"+theWord.lower()+",1,0,0.html")#open HTML file from vdict.com

wordDictText = wordDict.read()

wordDict.close()

startIndex = wordDictText.find("""<td class="resultContent">""")

temp = wordDictText[startIndex:]

endIndex = temp.find("</td>") + len("</td>")

wordDef = temp[0:endIndex]

print "writing word no.%s" % count

htmlBody += "<p>" + """<h1>""" + str(count) + " --> " + theWord.lower() + """</h1>""" + "</p>"

htmlBody += "<table><tr>" + wordDef + "</tr></table>"

count += 1

if ((count % 5) == 0): time.sleep(60)#sleep for a while after each 5 words waiting vdict.com server

htmlTitle = "<title>" + "Nghia cua cac tu trong " + textFile.name.lower() + "</title>"
htmlCode = "<html>" + htmlTitle + "<body>" + htmlBody + "</body" + "</html>"
fileName = "e:\LearningProg\Python" + "\\" + "TextTest.html"#create a output file in HTML form
outFile = open(fileName,"w")
outFile.write(htmlCode)

outFile.close()

Tra cứu nghĩa tiếng Việt của các từ trong một file văn bản!

Blog Archive

Labels