# this is throw away code
# first, unzip the epub file
# then, for each individual html file, run this script, updating the input and output file names (line 42,43)
# the html parsing is specific to a particular epub, so you will have to change the parsing logic for any other book

import anthropic

client = anthropic.Anthropic(
    # defaults to os.environ.get("ANTHROPIC_API_KEY")
    #api_key="or uncomment and put your api key here",
)

wordlist=['BOOK', 'CHAPTER', 'INTRODUCTION','CONTENTS']

def isheading(line):
    if len(line)>40:
        return False
    for word in wordlist:
        if word in line:
            return True
    return False    

def rewrite(chapter):
    message = client.messages.create(
        model="claude-sonnet-4-0",
#        max_tokens=1024, #was cutting off some chapters, so made it bigger
        max_tokens=16384,
        messages=[
            # put your LLM prompt here
            {"role": "user", "content": "Re-write the following passage in the style of Authorfirstname Authorlastname:\n\n"+chapter}
        ]
    )
    chapter_split = message.content[0].text.split('\n\n')
    chapter_html=''
    for paragraph in chapter_split:
        chapter_html = chapter_html+'<p class="calibre3">'+paragraph+'</p>\n<br class="calibre1"/>\n\n'
    return chapter_html
    
# Input and output file names
# For any other book, you will almost certainly have to change the parsing logic
# This is specific to the epub found here https://standardebooks.org/ebooks/t-e-lawrence/seven-pillars-of-wisdom
with open('The_Seven_Pillars_of_Wisdom_split_000.html.mod', 'w') as file2:
    with open('The_Seven_Pillars_of_Wisdom_split_000.html', 'r') as file:
        chapter=""
        for line in file:
            if '<p class="calibre3">' in line:
                newline = line.replace('<p class="calibre3">','')
                newline = newline.replace('</p>','')
                newline = newline.lstrip()
                newline = newline.rstrip()
                #check if paragraph or heading
                if isheading(newline):
                    #if nothing yet, it's the start of a new chapter, so we'll write out the last one
                    if len(chapter)>0:
                        file2.write(rewrite(chapter))
                    #start new chapter
                    chapter=''
                    file2.write(line+'\n\n')
                else:
                    chapter=chapter+str(newline)+'\n\n'
            else:
                #we are adding line breaks as we go, so only write out formatting that's not a line break
                if '<br class="calibre1"/>' not in line:
                    if '</body>' in line:
                        #write out current chapter before closing HTML
                        if len(chapter)>0:
                            file2.write(rewrite(chapter))
                    #write out all other formatting                    
                    file2.write(line)
