Adding this to the end of the file data_resquest.py
, you can make this addon generate images with open ai dalle 3 engine. This will replace the default text response with an image generation result. You need to remove this code if you would like to generate text responses again:
def send_prompt_to_openai_image(prompt):
config = mw.addonManager.getConfig(__name__)
if config['emulate'] == 'yes':
print("Fake request chatgpt: ", prompt)
return f"This is a fake response for emulation mode for the prompt {prompt}."
try:
import requests
import json
import base64
import pathlib
print("Request to ChatGPT: ", prompt)
api_key = config['apiKey']
media_dir = pathlib.Path(mw.col.media.dir())
# https://platform.openai.com/docs/api-reference/images/create
url = "https://api.openai.com/v1/images/generations"
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"
}
data = {
"prompt": prompt,
"n": 1,
"model": 'dall-e-3',
"quality": 'hd',
"response_format": "b64_json",
"size": "1024x1024" # Optional: can be adjusted to different sizes like "256x256", "512x512"
}
response = requests.post(url, headers=headers, data=json.dumps(data))
if response.status_code == 200:
response_json = response.json()
revised_prompt = response_json["data"][0]["revised_prompt"]
file_name = f"{revised_prompt[:100]}-{response_json['created']}"
invalid_chars = r'[\/:*?"<>|\n]'
file_name = re.sub(invalid_chars, '', file_name)
# with open(media_dir / f"{file_name}.json", mode="w", encoding="utf-8") as file:
# json.dump(response_json, file)
image_info = (f"""<img alt="generated image" src="{file_name}.png">"""
f"""\n:::plugin:::\n{response_json['created']},\n{revised_prompt},\n{file_name}.png"""
)
print(image_info)
image_data = base64.b64decode(response_json["data"][0]["b64_json"])
with open(media_dir / f"{file_name}.png", mode="wb") as png:
png.write(image_data)
# dalle 3 has request limit https://platform.openai.com/docs/guides/rate-limits/usage-tiers?context=tier-free
# check your tier on https://platform.openai.com/settings/organization/limits
# time.sleep(10.0)
return image_info
print(f"Failed to get response: {response.status_code}")
print(response.text)
print(data)
return None
except Exception as e:
print(f"An error occurred while processing the note: {str(e)}", file=sys.stderr)
return None
send_prompt_to_openai = send_prompt_to_openai_image
Edit:
With this other patch, you can also have on the web interface a checkbox to enable image generation or text generation:
diff --git a/1416178071/__init__.py b/1416178071/__init__.py
index 67cf429..0eac73a 100644
--- a/1416178071/__init__.py
+++ b/1416178071/__init__.py
@@ -21,7 +21,7 @@ def get_common_fields(selected_nodes_ids):
note = mw.col.getNote(nid)
note_fields = set(note.keys())
common_fields = common_fields.intersection(note_fields)
- return list(common_fields)
+ return sorted(list(common_fields))
def create_run_prompt_dialog_from_browser(browser, prompt_config):
common_fields = get_common_fields(browser.selectedNotes())
diff --git a/1416178071/data_request.py b/1416178071/data_request.py
index 8595e03..716dd07 100644
--- a/1416178071/data_request.py
+++ b/1416178071/data_request.py
@@ -130,5 +130,3 @@ def send_prompt_to_openai_image(prompt):
print(f"An error occurred while processing the note: {str(e)}", file=sys.stderr)
return None
-
-# send_prompt_to_openai = send_prompt_to_openai_image
diff --git a/1416178071/process_notes.py b/1416178071/process_notes.py
index 0f3fa13..37bfd16 100644
--- a/1416178071/process_notes.py
+++ b/1416178071/process_notes.py
@@ -3,7 +3,7 @@ from PyQt5.QtWidgets import QDialog, QVBoxLayout, QProgressBar, QPushButton, QLa
from aqt import mw
from aqt.utils import showWarning
-from .data_request import create_prompt, send_prompt_to_openai
+from .data_request import create_prompt, send_prompt_to_openai, send_prompt_to_openai_image
from .modify_notes import fill_field_for_note_in_editor, fill_field_for_note_not_in_editor
@@ -71,7 +71,10 @@ class ProgressDialog(QDialog):
def generate_for_single_note(editor, prompt_config):
"""Generate text for a single note (editor note)."""
prompt = create_prompt(editor.note, prompt_config)
- response = send_prompt_to_openai(prompt)
+ if prompt_config["generateImage"]:
+ response = send_prompt_to_openai_image(prompt)
+ else:
+ response = send_prompt_to_openai(prompt)
target_field = prompt_config['targetField']
fill_field_for_note_in_editor(response, target_field, editor)
@@ -81,7 +84,11 @@ def generate_for_multiple_notes(nid, prompt_config):
"""Generate text for multiple notes."""
note = mw.col.get_note(nid)
prompt = create_prompt(note, prompt_config)
- response = send_prompt_to_openai(prompt)
+ if prompt_config["generateImage"]:
+ response = send_prompt_to_openai_image(prompt)
+ else:
+ response = send_prompt_to_openai(prompt)
+
fill_field_for_note_not_in_editor(response, note, prompt_config['targetField'])
diff --git a/1416178071/run_prompt_dialog.py b/1416178071/run_prompt_dialog.py
index fd24225..daee1a3 100644
--- a/1416178071/run_prompt_dialog.py
+++ b/1416178071/run_prompt_dialog.py
@@ -1,6 +1,6 @@
import re
-from PyQt5.QtWidgets import QDialog, QVBoxLayout, QLabel, QPushButton, QTextEdit, QComboBox
+from PyQt5.QtWidgets import QDialog, QVBoxLayout, QLabel, QPushButton, QTextEdit, QComboBox, QCheckBox
from aqt import mw
from aqt.utils import showWarning
@@ -31,6 +31,9 @@ class RunPromptDialog(QDialog):
layout.addWidget(QLabel("Target Field:"))
layout.addWidget(self.target_field_editor)
+ self.enable_image_checkbox = QCheckBox("Enable Image")
+ layout.addWidget(self.enable_image_checkbox)
+
run_button = QPushButton("Run")
run_button.clicked.connect(self.try_to_accept)
@@ -40,6 +43,7 @@ class RunPromptDialog(QDialog):
def try_to_accept(self):
self.prompt_config["prompt"] = self.prompt_editor.toPlainText()
self.prompt_config["targetField"] = self.target_field_editor.currentText()
+ self.prompt_config["generateImage"] = self.enable_image_checkbox.isChecked()
invalid_fields = get_invalid_fields_in_prompt(self.prompt_config["prompt"], self.possible_fields)
if invalid_fields:
You are ChatGPT, a large language model trained by OpenAI based on the GPT-4 architecture. Let’s work this out step by step to make sure we have the right answer. If there is a flaw in my logic, point out the flaw, explain why someone might be mistaken, and explain the correct solution. You will receive a word and three example sentences. Your task is to generate an image that best describes the word by selecting one of the three example sentences. Prioritize choosing an example sentence that portrays a more natural environment, which includes elements such as trees, animals, mountains, rivers, oceans, lakes, plants, flowers, stars, outer space, planets, and other aspects of nature. If none of the sentences can naturally incorporate these elements of nature, select the sentence that best conveys the essence of the word, even if it involves an urban or indoor environment.
Word: {{{Verb}}}
Sentence 1: {{{VerbExample}}}
Sentence 2: {{{PastSimpleExample}}}
Sentence 3: {{{PastParticipleExample}}}