diff --git a/fs_attachment/README.rst b/fs_attachment/README.rst new file mode 100644 index 0000000000..28c2e4bfc6 --- /dev/null +++ b/fs_attachment/README.rst @@ -0,0 +1,349 @@ +============================ +Base Attachment Object Store +============================ + +.. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + !! This file is generated by oca-gen-addon-readme !! + !! changes will be overwritten. !! + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +.. |badge1| image:: https://img.shields.io/badge/maturity-Beta-yellow.png + :target: https://odoo-community.org/page/development-status + :alt: Beta +.. |badge2| image:: https://img.shields.io/badge/licence-AGPL--3-blue.png + :target: http://www.gnu.org/licenses/agpl-3.0-standalone.html + :alt: License: AGPL-3 +.. |badge3| image:: https://img.shields.io/badge/github-OCA%2Fstorage-lightgray.png?logo=github + :target: https://github.com/OCA/storage/tree/16.0/fs_attachment + :alt: OCA/storage +.. |badge4| image:: https://img.shields.io/badge/weblate-Translate%20me-F47D42.png + :target: https://translation.odoo-community.org/projects/storage-16-0/storage-16-0-fs_attachment + :alt: Translate me on Weblate +.. |badge5| image:: https://img.shields.io/badge/runbot-Try%20me-875A7B.png + :target: https://runbot.odoo-community.org/runbot/275/16.0 + :alt: Try me on Runbot + +|badge1| |badge2| |badge3| |badge4| |badge5| + +In some cases, you need to store attachment in another system that the Odoo's +filestore. For example, when your deployment is based on a multi-server +architecture to ensure redundancy and scalability, your attachments must +be stored in a way that they are accessible from all the servers. In this +way, you can use a shared storage system like NFS or a cloud storage like +S3 compliant storage, or.... + +This addon extend the storage mechanism of Odoo's attachments to allow +you to store them in any storage filesystem supported by the Python +library `fsspec `_ and made +available via the `fs_storage` addon. + +In contrast to Odoo, when a file is stored into an external storage, this +addon ensures that the filename keeps its meaning (In odoo the filename +into the filestore is the file content checksum). Concretely the filename +is based on the pattern: +'--.' + +This addon also adds on the attachments 2 new fields to use +to retrieve the file content from a URL: + +* ``Internal URL``: URL to retrieve the file content from the Odoo's + filestore. +* ``Filesystem URL``: URL to retrieve the file content from the external + storage. + +.. note:: + + The internal URL is always available, but the filesystem URL is only + available when the attachment is stored in an external storage. + Particular attention has been paid to limit as much as possible the consumption + of resources necessary to serve via Odoo the content stored in an external + filesystem. The implementation is based on an end-to-end streaming of content + between the external filesystem and the Odoo client application by default. + Nevertheless, if your content is available via a URL on the external filesystem, + you can configure the storage to use the x-sendfile mechanism to serve the + content if it's activated on your Odoo instance. In this case, the content + served by Odoo at the internal URL will be proxied to the filesystem URL + by nginx. + +Last but not least, the addon adds a new method `open` on the attachment. This +method allows you to open the attachment as a file. For attachments stored into +the filestore or in an external filesystem, it allows you to directly read from +and write to the file and therefore minimize the memory consumption since data +are not kept into memory before being written into the database. + +**Table of contents** + +.. contents:: + :local: + +Usage +===== + +Configuration +~~~~~~~~~~~~~ + +The configuration is done through the creation of a filesytem storage record +into odoo. To create a new storage, go to the menu +``Settings > Technical > FS Storage`` and click on ``Create``. + +In addition to the common fields available to configure a storage, specifics +fields are available under the section 'Attachment' to configure the way +attachments will be stored in the filesystem. + +* ``Optimizes Directory Path``: This option is useful if you need to prevent + having too many files in a single directory. It will create a directory + structure based on the attachment's checksum (with 2 levels of depth) + For example, if the checksum is ``123456789``, the file will be stored in the + directory ``/path/to/storage/12/34/my_file-1-0.txt``. +* ``Autovacuum GC``: This is used to automatically remove files from the filesystem + when it's no longer referenced in Odoo. Some storage backends (like S3) may + charge you for the storage of files, so it's important to remove them when + they're no longer needed. In some cases, this option is not desirable, for + example if you're using a storage backend to store images shared with others + systems (like your website) and you don't want to remove the files from the + storage while they're still referenced into the others systems. + This mechanism is based on a ``fs.file.gc`` model used to collect the files + to remove. This model is automatically populated by the ``ir.attachment`` + model when a file is removed from the database. If you disable this option, + you'll have to manually take care of the records in the ``fs.file.gc`` for + your filesystem storage. +* ``Use As Default For Attachment``: This options allows you to declare the storage + as the default one for attachments. If you have multiple filesystem storage + configured, you can choose which one will be used by default for attachments. + Once activated, attachments created without specifying a storage will be + stored in this default storage. +* ``Force DB For Default Attachment Rules``: This option is useful if you want to + force the storage of some attachments in the database, even if you have a + default filesystem storage configured. This is specially useful when you're + using a storage backend like S3, where the latency of the network can be + high. This option is a JSON field that allows you to define the mimetypes and + the size limit below which the attachments will be stored in the database. + + Small images (128, 256) are used in Odoo in list / kanban views. We + want them to be fast to read. + They are generally < 50KB (default configuration) so they don't take + that much space in database, but they'll be read much faster than from + the object storage. + + The assets (application/javascript, text/css) are stored in database + as well whatever their size is: + + * a database doesn't have thousands of them + * of course better for performance + * better portability of a database: when replicating a production + instance for dev, the assets are included + + The default configuration is: + + {"image/": 51200, "application/javascript": 0, "text/css": 0} + + Where the key is the beginning of the mimetype to configure and the + value is the limit in size below which attachments are kept in DB. + 0 means no limit. + + Default configuration means: + + * images mimetypes (image/png, image/jpeg, ...) below 50KB are + stored in database + * application/javascript are stored in database whatever their size + * text/css are stored in database whatever their size + + This option is only available on the filesystem storage that is used + as default for attachments. + +Another key feature of this module is the ability to get access to the attachments +from URLs. + +* ``Base URL``: This is the base URL used to access the attachments from the + filesystem storage itself. If your storage doesn't provide a way to access + the files from a URL, you can leave this field empty. +* ``Is Directory Path In URL``: Normally the directory patch configured on the storage + is not included in the URL. If you want to include it, you can activate this option. +* ``Use X-Sendfile To Serve Internal Url``: If checked and odoo is behind a proxy + that supports x-sendfile, the content served by the attachment's internal URL + will be served by the proxy using the filesystem url path if defined (This field + is available on the attachment if the storage is configured with a base URL) + If not, the file will be served by odoo that will stream the content read from + the filesystem storage. This option is useful to avoid to serve files from odoo + and therefore to avoid to load the odoo process. + + To be fully functional, this option requires the proxy to support x-sendfile + (apache) or x-accel-redirect (nginx). You must also configure your proxy by + adding for each storage a rule to redirect the url rooted at the 'storagge code' + to the server serving the files. For example, if you have a storage with the + code 'my_storage' and a server serving the files at the url 'http://myserver.com', + you must add the following rule in your proxy configuration: + + .. code-block:: nginx + + location /my_storage/ { + internal; + proxy_pass http://myserver.com; + } + + With this configuration a call to '/web/content//" + for a file stored in the 'my_storage' storage will generate a response by odoo + with the URI + ``/my_storage//--`` + in the headers ``X-Accel-Redirect`` and ``X-Sendfile`` and the proxy will redirect to + ``http://myserver.com//--``. + + see https://www.nginx.com/resources/wiki/start/topics/examples/x-accel/ for more + information. + +* ``Use Filename Obfuscation``: If checked, the filename used to store the content + into the filesystem storage will be obfuscated. This is useful to avoid to + expose the real filename of the attachments outside of the Odoo database. + The filename will be obfuscated by using the checksum of the content. This option + is to avoid when the content of your filestore is shared with other systems + (like your website) and you want to keep a meaningful filename to ensure + SEO. This option is disabled by default. + + +Server Environment +~~~~~~~~~~~~~~~~~~ + +When you configure a storage through the use of server environment file, you can +provide values for the following keys: + +* ``optimizes_directory_path`` +* ``autovacuum_gc`` +* ``base_url`` +* ``is_directory_path_in_url`` +* ``use_x_sendfile_to_serve_internal_url`` +* ``use_as_default_for_attachments`` +* ``force_db_for_default_attachment_rules`` +* ``use_filename_obfuscation`` + +For example, the configuration of my storage with code `fsprod` used to store +the attachments by default could be: + +.. code-block:: ini + + [fs_storage.fsprod] + protocol=s3 + options={"endpoint_url": "https://my_s3_server/", "key": "KEY", "secret": "SECRET"} + directory_path=my_bucket + use_as_default_for_attachments=True + use_filename_obfuscation=True + +Advanced usage: Using attachment as a file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The `open` method on the attachment can be used to open manipulate the attachment +as a file object. The object returned by the call to the method implements +methods from ``io.IOBase``. The method can ba called as any other python method. +In such a case, it's your responsibility to close the file at the end of your +process. + +.. code-block:: python + + attachment = self.env.create({"name": "test.txt"}) + the_file = attachment.open("wb") + try: + the_file.write(b"content") + finally: + the_file.close() + +The result of the call to `open` also works in a context ``with`` block. In such +a case, when the code exit the block, the file is automatically closed. + +.. code-block:: python + + attachment = self.env.create({"name": "test.txt"}) + with attachment.open("wb") as the_file: + the_file.write(b"content") + +It's always safer to prefer the second approach. + +When your attachment is stored into the odoo filestore or into an external +filesystem storage, each time you call the open method, a new file is created. +This way of doing ensures that if the transaction is rollback the original content +is preserve. Nevertheless you could have use cases where you would like to write +to the existing file directly. For example you could create an empty attachment +to store a csv report and then use the `open` method to write your content directly +into the new file. To support this kind a use cases, the parameter `new_version` +can be passed as `False` to avoid the creation of a new file. + +.. code-block:: python + + attachment = self.env.create({"name": "test.txt"}) + with attachment.open("w", new_version=False) as f: + writer = csv.writer(f, delimiter=";") + .... + + +Tips & Tricks +~~~~~~~~~~~~~ + +* When working in multi staging environments, the management of the attachments + can be tricky. For example, if you have a production instance and a staging + instance based on a backup of the production environment, you may want to have + the attachments shared between the two instances BUT you don't want to have + one instance removing or modifying the attachments of the other instance. + + To do so, you can add on your staging instances a new storage and declare it + as the default storage to use for attachments. This way, all the new attachments + will be stored in this new storage but the attachments created on the production + instance will still be read from the production storage. Be careful to adapt the + configuration of your storage to the production environment to make it read only. + (The use of server environment files is a good way to do so). + +Bug Tracker +=========== + +Bugs are tracked on `GitHub Issues `_. +In case of trouble, please check there if your issue has already been reported. +If you spotted it first, help us smashing it by providing a detailed and welcomed +`feedback `_. + +Do not contact contributors directly about support or help with technical issues. + +Credits +======= + +Authors +~~~~~~~ + +* Camptocamp +* ACSONE SA/NV + +Contributors +~~~~~~~~~~~~ + +Thierry Ducrest +Guewen Baconnier +Julien Coux +Akim Juillerat +Thomas Nowicki +Vincent Renaville +Denis Leemann +Patrick Tombez +Don Kendall +Stephane Mangin +Laurent Mignon + +Maintainers +~~~~~~~~~~~ + +This module is maintained by the OCA. + +.. image:: https://odoo-community.org/logo.png + :alt: Odoo Community Association + :target: https://odoo-community.org + +OCA, or the Odoo Community Association, is a nonprofit organization whose +mission is to support the collaborative development of Odoo features and +promote its widespread use. + +.. |maintainer-lmignon| image:: https://github.com/lmignon.png?size=40px + :target: https://github.com/lmignon + :alt: lmignon + +Current `maintainer `__: + +|maintainer-lmignon| + +This module is part of the `OCA/storage `_ project on GitHub. + +You are welcome to contribute. To learn how please visit https://odoo-community.org/page/Contribute. diff --git a/fs_attachment/__init__.py b/fs_attachment/__init__.py new file mode 100644 index 0000000000..6d58305f5d --- /dev/null +++ b/fs_attachment/__init__.py @@ -0,0 +1,2 @@ +from . import models +from .hooks import pre_init_hook diff --git a/fs_attachment/__manifest__.py b/fs_attachment/__manifest__.py new file mode 100644 index 0000000000..3a4deeacb9 --- /dev/null +++ b/fs_attachment/__manifest__.py @@ -0,0 +1,24 @@ +# Copyright 2017-2021 Camptocamp SA +# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html) + + +{ + "name": "Base Attachment Object Store", + "summary": "Store attachments on external object store", + "version": "16.0.1.0.0", + "author": "Camptocamp, ACSONE SA/NV, Odoo Community Association (OCA)", + "license": "AGPL-3", + "development_status": "Beta", + "category": "Knowledge Management", + "depends": ["fs_storage"], + "website": "https://github.com/OCA/storage", + "data": [ + "security/fs_file_gc.xml", + "views/fs_storage.xml", + ], + "external_dependencies": {"python": ["python_slugify"]}, + "installable": True, + "auto_install": False, + "maintainers": ["lmignon"], + "pre_init_hook": "pre_init_hook", +} diff --git a/fs_attachment/fs_stream.py b/fs_attachment/fs_stream.py new file mode 100644 index 0000000000..fafc233892 --- /dev/null +++ b/fs_attachment/fs_stream.py @@ -0,0 +1,100 @@ +# Copyright 2023 ACSONE SA/NV +# License AGPL-3.0 or later (https://www.gnu.org/licenses/agpl). +from __future__ import annotations + +from odoo.http import STATIC_CACHE_LONG, Response, Stream, request +from odoo.tools import config + +from .models.ir_attachment import IrAttachment + +try: + from werkzeug.utils import send_file as _send_file +except ImportError: + from odoo.tools._vendor.send_file import send_file as _send_file + + +class FsStream(Stream): + fs_attachment = None + + @classmethod + def from_fs_attachment(cls, attachment: IrAttachment) -> FsStream: + attachment.ensure_one() + if not attachment.fs_filename: + raise ValueError("Attachment is not stored into a filesystem storage") + size = 0 + if cls._check_use_x_sendfile(attachment): + fs, _storage, fname = attachment._get_fs_parts() + fs_info = fs.info(fname) + size = fs_info["size"] + return cls( + mimetype=attachment.mimetype, + download_name=attachment.name, + conditional=True, + etag=attachment.checksum, + type="fs", + size=size, + last_modified=attachment["__last_update"], + fs_attachment=attachment, + ) + + def read(self): + if self.type == "fs": + with self.fs_attachment.open("rb") as f: + return f.read() + return super().read() + + def get_response(self, as_attachment=None, immutable=None, **send_file_kwargs): + if self.type != "fs": + return super().get_response( + as_attachment=as_attachment, immutable=immutable, **send_file_kwargs + ) + if as_attachment is None: + as_attachment = self.as_attachment + if immutable is None: + immutable = self.immutable + send_file_kwargs = { + "mimetype": self.mimetype, + "as_attachment": as_attachment, + "download_name": self.download_name, + "conditional": self.conditional, + "etag": self.etag, + "last_modified": self.last_modified, + "max_age": STATIC_CACHE_LONG if immutable else self.max_age, + "environ": request.httprequest.environ, + "response_class": Response, + **send_file_kwargs, + } + use_x_sendfile = self._fs_use_x_sendfile + # The file will be closed by werkzeug... + send_file_kwargs["use_x_sendfile"] = use_x_sendfile + if not use_x_sendfile: + f = self.fs_attachment.open("rb") + res = _send_file(f, **send_file_kwargs) + else: + x_accel_redirect = ( + f"/{self.fs_attachment.fs_storage_code}{self.fs_attachment.fs_url_path}" + ) + send_file_kwargs["use_x_sendfile"] = True + res = _send_file("", **send_file_kwargs) + # nginx specific headers + res.headers["X-Accel-Redirect"] = x_accel_redirect + # apache specific headers + res.headers["X-Sendfile"] = x_accel_redirect + res.headers["Content-Length"] = 0 + + if immutable and res.cache_control: + res.cache_control["immutable"] = None + return res + + @classmethod + def _check_use_x_sendfile(cls, attachment: IrAttachment) -> bool: + return ( + config["x_sendfile"] + and attachment.fs_url + and attachment.fs_storage_id.use_x_sendfile_to_serve_internal_url + ) + + @property + def _fs_use_x_sendfile(self) -> bool: + """Return True if x-sendfile should be used to serve the file""" + return self._check_use_x_sendfile(self.fs_attachment) diff --git a/fs_attachment/hooks.py b/fs_attachment/hooks.py new file mode 100644 index 0000000000..bbb464389a --- /dev/null +++ b/fs_attachment/hooks.py @@ -0,0 +1,33 @@ +# Copyright 2023 ACSONE SA/NV +# License AGPL-3.0 or later (https://www.gnu.org/licenses/agpl). +import logging + +_logger = logging.getLogger(__name__) + + +def pre_init_hook(cr): + """Pre init hook.""" + # add columns for computed fields to avoid useless computation by the ORM + # when installing the module + _logger.info("Add columns for computed fields on ir_attachment") + cr.execute( + """ + ALTER TABLE ir_attachment + ADD COLUMN fs_storage_id INTEGER; + ALTER TABLE ir_attachment + ADD FOREIGN KEY (fs_storage_id) REFERENCES fs_storage(id); + """ + ) + cr.execute( + """ + ALTER TABLE ir_attachment + ADD COLUMN fs_url VARCHAR; + """ + ) + cr.execute( + """ + ALTER TABLE ir_attachment + ADD COLUMN fs_storage_code VARCHAR; + """ + ) + _logger.info("Columns added on ir_attachment") diff --git a/fs_attachment/models/__init__.py b/fs_attachment/models/__init__.py new file mode 100644 index 0000000000..bfe56d2fda --- /dev/null +++ b/fs_attachment/models/__init__.py @@ -0,0 +1,4 @@ +from . import fs_file_gc +from . import fs_storage +from . import ir_attachment +from . import ir_binary diff --git a/fs_attachment/models/fs_file_gc.py b/fs_attachment/models/fs_file_gc.py new file mode 100644 index 0000000000..6ab70ec38e --- /dev/null +++ b/fs_attachment/models/fs_file_gc.py @@ -0,0 +1,168 @@ +# Copyright 2023 ACSONE SA/NV +# License AGPL-3.0 or later (https://www.gnu.org/licenses/agpl). +import logging +import threading +from contextlib import closing, contextmanager + +from odoo import api, fields, models +from odoo.sql_db import Cursor + +_logger = logging.getLogger(__name__) + + +class FsFileGC(models.Model): + + _name = "fs.file.gc" + _description = "Filesystem storage file garbage collector" + + store_fname = fields.Char("Stored Filename") + fs_storage_code = fields.Char("Storage Code") + + _sql_constraints = [ + ( + "store_fname_uniq", + "unique (store_fname)", + "The stored filename must be unique!", + ), + ] + + def _is_test_mode(self) -> bool: + """Return True if we are running the tests, so we do not mark files for + garbage collection into a separate transaction. + """ + return ( + getattr(threading.current_thread(), "testing", False) + or self.env.registry.in_test_mode() + ) + + @contextmanager + def _in_new_cursor(self) -> Cursor: + """Context manager to execute code in a new cursor""" + if self._is_test_mode() or not self.env.registry.ready: + yield self.env.cr + return + + with closing(self.env.registry.cursor()) as cr: + try: + yield cr + except Exception: + cr.rollback() + raise + else: + # disable pylint error because this is a valid commit, + # we are in a new env + cr.commit() # pylint: disable=invalid-commit + + @api.model + def _mark_for_gc(self, store_fname: str) -> None: + """Mark a file for garbage collection" + + This process is done in a separate transaction since the data must be + preserved even if the transaction is rolled back. + """ + with self._in_new_cursor() as cr: + code = store_fname.partition("://")[0] + # use plain SQL to avoid the ORM ignore conflicts errors + cr.execute( + """ + INSERT INTO + fs_file_gc ( + store_fname, + fs_storage_code, + create_date, + write_date, + create_uid, + write_uid + ) + VALUES ( + %s, + %s, + now() at time zone 'UTC', + now() at time zone 'UTC', + %s, + %s + ) + ON CONFLICT DO NOTHING + """, + (store_fname, code, self.env.uid, self.env.uid), + ) + + @api.autovacuum + def _gc_files(self) -> None: + """Garbage collect files""" + # This method is mainly a copy of the method _gc_file_store_unsafe() + # from the module fs_attachment. The only difference is that the list + # of files to delete is retrieved from the table fs_file_gc instead + # of the odoo filestore. + + # Continue in a new transaction. The LOCK statement below must be the + # first one in the current transaction, otherwise the database snapshot + # used by it may not contain the most recent changes made to the table + # ir_attachment! Indeed, if concurrent transactions create attachments, + # the LOCK statement will wait until those concurrent transactions end. + # But this transaction will not see the new attachements if it has done + # other requests before the LOCK (like the method _storage() above). + cr = self._cr + cr.commit() # pylint: disable=invalid-commit + + # prevent all concurrent updates on ir_attachment and fs_file_gc + # while collecting, but only attempt to grab the lock for a little bit, + # otherwise it'd start blocking other transactions. + # (will be retried later anyway) + cr.execute("SET LOCAL lock_timeout TO '10s'") + cr.execute("LOCK fs_file_gc IN SHARE MODE") + cr.execute("LOCK ir_attachment IN SHARE MODE") + + self._gc_files_unsafe() + + # commit to release the lock + cr.commit() # pylint: disable=invalid-commit + + def _gc_files_unsafe(self) -> None: + # get the list of fs.storage codes that must be autovacuumed + codes = ( + self.env["fs.storage"].search([]).filtered("autovacuum_gc").mapped("code") + ) + if not codes: + return + # we process by batch of storage codes. + self._cr.execute( + """ + SELECT + fs_storage_code, + array_agg(store_fname) + + FROM + fs_file_gc + WHERE + fs_storage_code IN %s + AND NOT EXISTS ( + SELECT 1 + FROM ir_attachment + WHERE store_fname = fs_file_gc.store_fname + ) + GROUP BY + fs_storage_code + """, + (tuple(codes),), + ) + for code, store_fnames in self._cr.fetchall(): + self.env["fs.storage"].get_by_code(code) + fs = self.env["fs.storage"].get_fs_by_code(code) + for store_fname in store_fnames: + try: + file_path = store_fname.partition("://")[2] + fs.rm(file_path) + except Exception: + _logger.debug("Failed to remove file %s", store_fname) + + # delete the records from the table fs_file_gc + self._cr.execute( + """ + DELETE FROM + fs_file_gc + WHERE + fs_storage_code IN %s + """, + (tuple(codes),), + ) diff --git a/fs_attachment/models/fs_storage.py b/fs_attachment/models/fs_storage.py new file mode 100644 index 0000000000..60203e865a --- /dev/null +++ b/fs_attachment/models/fs_storage.py @@ -0,0 +1,275 @@ +# Copyright 2023 ACSONE SA/NV +# License AGPL-3.0 or later (https://www.gnu.org/licenses/agpl). + +from odoo import _, api, fields, models, tools +from odoo.exceptions import ValidationError +from odoo.tools.safe_eval import const_eval + +from .ir_attachment import IrAttachment + + +class FsStorage(models.Model): + + _inherit = "fs.storage" + + optimizes_directory_path = fields.Boolean( + help="If checked, the directory path will be optimized to avoid " + "too much files into the same directory. This options is used when the " + "storage is used to store attachments. Depending on the storage, this " + "option can be ignored. It's useful for storage based on real file. " + "This way, files with similar properties will be stored in the same " + "directory, avoiding overcrowding in the root directory and optimizing " + "access times." + ) + autovacuum_gc = fields.Boolean( + string="Autovacuum Garbage Collection", + default=True, + help="If checked, the autovacuum of the garbage collection will be " + "automatically executed when the storage is used to store attachments. " + "Sometime, the autovacuum is to avoid when files in the storage are referenced " + "by other systems (like a website). In such case, records in the fs.file.gc " + "table must be manually processed.", + ) + base_url = fields.Char(default="") + is_directory_path_in_url = fields.Boolean( + default=False, + help="Normally the directory_path is for internal usage. " + "If this flag is enabled the path will be used to compute the " + "public URL.", + ) + base_url_for_files = fields.Char(compute="_compute_base_url_for_files", store=True) + use_x_sendfile_to_serve_internal_url = fields.Boolean( + string="Use X-Sendfile To Serve Internal Url", + help="If checked and odoo is behind a proxy that supports x-sendfile, " + "the content served by the attachment's internal URL will be served" + "by the proxy using the fs_url if defined. If not, the file will be " + "served by odoo that will stream the content read from the filesystem " + "storage. This option is useful to avoid to serve files from odoo " + "and therefore to avoid to load the odoo process. ", + ) + use_as_default_for_attachments = fields.Boolean( + help="If checked, this storage will be used to store all the attachments ", + default=False, + ) + force_db_for_default_attachment_rules = fields.Text( + help="When storing attachments in an external storage, storage may be slow." + "If the storage is used to store odoo attachments by default, this could lead " + "to a bad user experience since small images (128, 256) are used in Odoo " + "in list / kanban views. We want them to be fast to read." + "This field allows to force the store of some attachments in the odoo " + "database. The value is a dict Where the key is the beginning of the " + "mimetype to configure and the value is the limit in size below which " + "attachments are kept in DB. 0 means no limit.\n" + "Default configuration means:\n" + "* images mimetypes (image/png, image/jpeg, ...) below 50KB are stored " + "in database\n" + "* application/javascript are stored in database whatever their size \n" + "* text/css are stored in database whatever their size", + default=lambda self: self._default_force_db_for_default_attachment_rules, + ) + use_filename_obfuscation = fields.Boolean( + help="If checked, the filename will be obfuscated. This option is " + "useful to avoid to expose sensitive information trough the URL " + "or in the remote storage. The obfuscation is done using a hash " + "of the filename. The original filename is stored in the attachment " + "metadata. The obfusation is to avoid if the storage is used to store " + "files that are referenced by other systems (like a website) where " + "the filename is important for SEO.", + ) + + @api.constrains("use_as_default_for_attachments") + def _check_use_as_default_for_attachments(self): + # constrains are checked in python since values can be provided by + # the server environment + defaults = self.search([]).filtered("use_as_default_for_attachments") + if len(defaults) > 1: + raise ValidationError( + _("Only one storage can be used as default for attachments") + ) + + @property + def _server_env_fields(self): + env_fields = super()._server_env_fields + env_fields.update( + { + "optimizes_directory_path": {}, + "autovacuum_gc": {}, + "base_url": {}, + "is_directory_path_in_url": {}, + "use_x_sendfile_to_serve_internal_url": {}, + "use_as_default_for_attachments": {}, + "force_db_for_default_attachment_rules": {}, + "use_filename_obfuscation": {}, + } + ) + return env_fields + + @property + def _default_force_db_for_default_attachment_rules(self) -> str: + return '{"image/": 51200, "application/javascript": 0, "text/css": 0}' + + @api.onchange("use_as_default_for_attachments") + def _onchange_use_as_default_for_attachments(self): + if not self.use_as_default_for_attachments: + self.force_db_for_default_attachment_rules = "" + else: + self.force_db_for_default_attachment_rules = ( + self._default_force_db_for_default_attachment_rules + ) + + @api.model_create_multi + def create(self, vals_list): + for vals in vals_list: + if not vals.get("use_as_default_for_attachments"): + vals["force_db_for_default_attachment_rules"] = None + return super().create(vals_list) + + def write(self, vals): + if "use_as_default_for_attachments" in vals: + if not vals["use_as_default_for_attachments"]: + vals["force_db_for_default_attachment_rules"] = None + return super().write(vals) + return super().write(vals) + + @api.constrains( + "force_db_for_default_attachment_rules", "use_as_default_for_attachments" + ) + def _check_force_db_for_default_attachment_rules(self): + for rec in self: + if not rec.force_db_for_default_attachment_rules: + continue + if not rec.use_as_default_for_attachments: + raise ValidationError( + _( + "The force_db_for_default_attachment_rules can only be set " + "if the storage is used as default for attachments." + ) + ) + try: + const_eval(rec.force_db_for_default_attachment_rules) + except (SyntaxError, TypeError, ValueError) as e: + raise ValidationError( + _( + "The force_db_for_default_attachment_rules is not a valid " + "python dict." + ) + ) from e + + @api.model + @tools.ormcache() + def get_default_storage_code_for_attachments(self): + """Return the code of the storage to use to store by default the attachments""" + storages = self.search([]).filtered_domain( + [("use_as_default_for_attachments", "=", True)] + ) + if storages: + return storages[0].code + return None + + @api.model + @tools.ormcache("code") + def get_force_db_for_default_attachment_rules(self, code): + """Return the rules to force the storage of some attachments in the DB + + :param code: the code of the storage + :return: a dict where the key is the beginning of the mimetype to configure + and the value is the limit in size below which attachments are kept in DB. + 0 means no limit. + """ + storage = self.get_by_code(code) + if storage and storage.force_db_for_default_attachment_rules: + return const_eval(storage.force_db_for_default_attachment_rules) + return {} + + @api.model + @tools.ormcache("code") + def _must_optimize_directory_path(self, code): + return self.get_by_code(code).optimizes_directory_path + + @api.model + @tools.ormcache("code") + def _must_autovacuum_gc(self, code): + return self.get_by_code(code).autovacuum_gc + + @api.model + @tools.ormcache("code") + def _must_use_filename_obfuscation(self, code): + return self.get_by_code(code).use_filename_obfuscation + + @api.depends("base_url", "is_directory_path_in_url") + def _compute_base_url_for_files(self): + for rec in self: + if not rec.base_url: + rec.base_url_for_files = "" + continue + parts = [rec.base_url] + if rec.is_directory_path_in_url and rec.directory_path: + parts.append(rec.directory_path) + rec.base_url_for_files = self._normalize_url("/".join(parts)) + + @api.model + def _get_url_for_attachment( + self, attachment: IrAttachment, exclude_base_url: bool = False + ) -> str | None: + """Return the URL to access the attachment + + :param attachment: an attachment record + :return: the URL to access the attachment + """ + fs_storage = self.get_by_code(attachment.fs_storage_code) + if not fs_storage: + return None + base_url = fs_storage.base_url_for_files + if not base_url: + return None + if exclude_base_url: + base_url = base_url.replace(fs_storage.base_url.rstrip("/"), "") or "/" + # always remove the directory_path from the fs_filename + # only if it's at the start of the filename + fs_filename = attachment.fs_filename + if fs_filename.startswith(fs_storage.directory_path): + fs_filename = fs_filename.replace(fs_storage.directory_path, "") + parts = [base_url, fs_filename] + return self._normalize_url("/".join(parts)) + + @api.model + def _normalize_url(self, url: str) -> str: + """Normalize the URL + + :param url: the URL to normalize + :return: the normalized URL + remove all the double slashes and the trailing slash except if the URL + is only a slash (in this case we return a single slash). Avoid to remove + the double slash in the protocol part of the URL. + """ + if url == "/": + return url + parts = url.split("/") + parts = [x for x in parts if x] + if not parts: + return "/" + if parts[0].endswith(":"): + parts[0] = parts[0] + "/" + else: + # we preserve the trailing slash if the URL is absolute + parts[0] = "/" + parts[0] + return "/".join(parts) + + def recompute_urls(self) -> None: + """Recompute the URL of all attachments since the base_url or the + directory_path has changed. This method must be explicitly called + by the user since we don't want to recompute the URL on each change + of the base_url or directory_path. We could also have cases where such + a recompute is not wanted. For example, when you restore a database + from production to staging, you don't want to recompute the URL of + the attachments created in production (since the directory_path use + in production is readonly for the staging database) but you change the + directory_path of the staging database to ensure that all the moditications + in staging are done in a different directory and will not impact the + production. + """ + attachments = self.env["ir.attachment"].search( + [("fs_storage_id", "in", self.ids)] + ) + attachments._compute_fs_url() + attachments._compute_fs_url_path() diff --git a/fs_attachment/models/ir_attachment.py b/fs_attachment/models/ir_attachment.py new file mode 100644 index 0000000000..20d3aa46aa --- /dev/null +++ b/fs_attachment/models/ir_attachment.py @@ -0,0 +1,1079 @@ +# Copyright 2017-2013 Camptocamp SA +# Copyright 2023 ACSONE SA/NV +# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html) + +import io +import logging +import mimetypes +import os +import re +import time +from contextlib import closing, contextmanager + +import fsspec # pylint: disable=missing-manifest-dependency +import psycopg2 +from slugify import slugify # pylint: disable=missing-manifest-dependency + +import odoo +from odoo import _, api, fields, models +from odoo.exceptions import AccessError, UserError +from odoo.osv.expression import AND, OR, normalize_domain + +from .strtobool import strtobool + +_logger = logging.getLogger(__name__) + + +REGEX_SLUGIFY = r"[^-a-z0-9_]+" + +FS_FILENAME_RE_PARSER = re.compile( + r"^(?P.+)-(?P\d+)-(?P\d+)(?P\..+)$" +) + + +def is_true(strval): + return bool(strtobool(strval or "0")) + + +def clean_fs(files): + _logger.info("cleaning old files from filestore") + for full_path in files: + if os.path.exists(full_path): + try: + os.unlink(full_path) + except OSError: + _logger.info( + "_file_delete could not unlink %s", full_path, exc_info=True + ) + except IOError: + # Harmless and needed for race conditions + _logger.info( + "_file_delete could not unlink %s", full_path, exc_info=True + ) + + +class IrAttachment(models.Model): + _inherit = "ir.attachment" + + fs_filename = fields.Char( + "File Name into the filesystem storage", + help="The name of the file in the filesystem storage." + "To preserve the mimetype and the meaning of the filename" + "the filename is computed from the name and the extension", + readonly=True, + ) + + internal_url = fields.Char( + "Internal URL", + compute="_compute_internal_url", + help="The URL to access the file from the server.", + ) + + fs_url = fields.Char( + "Filesystem URL", + compute="_compute_fs_url", + help="The URL to access the file from the filesystem storage.", + store=True, + ) + fs_url_path = fields.Char( + "Filesystem URL Path", + compute="_compute_fs_url_path", + help="The path to access the file from the filesystem storage.", + ) + fs_storage_code = fields.Char( + "Filesystem Storage Code", + related="fs_storage_id.code", + store=True, + ) + fs_storage_id = fields.Many2one( + "fs.storage", + "Filesystem Storage", + compute="_compute_fs_storage_id", + help="The storage where the file is stored.", + store=True, + ondelete="restrict", + ) + + @api.depends("name") + def _compute_internal_url(self) -> None: + for rec in self: + filename, extension = os.path.splitext(rec.name) + if not extension: + extension = mimetypes.guess_extension(rec.mimetype) + rec.internal_url = f"/web/content/{rec.id}/{filename}{extension}" + + @api.depends("fs_filename") + def _compute_fs_url(self) -> None: + for rec in self: + rec.fs_url = None + if rec.fs_filename: + rec.fs_url = self.env["fs.storage"]._get_url_for_attachment(rec) + + @api.depends("fs_filename") + def _compute_fs_url_path(self) -> None: + for rec in self: + rec.fs_url_path = None + if rec.fs_filename: + rec.fs_url_path = self.env["fs.storage"]._get_url_for_attachment( + rec, exclude_base_url=True + ) + + @api.depends("fs_filename") + def _compute_fs_storage_id(self): + for rec in self: + if rec.store_fname: + code = rec.store_fname.partition("://")[0] + fs_storage = self.env["fs.storage"].get_by_code(code) + if fs_storage != rec.fs_storage_id: + rec.fs_storage_id = fs_storage + elif rec.fs_storage_id: + rec.fs_storage_id = None + + @staticmethod + def _is_storage_disabled(storage=None, log=True): + msg = _("Storages are disabled (see environment configuration).") + if storage: + msg = _("Storage '%s' is disabled (see environment configuration).") % ( + storage, + ) + is_disabled = is_true(os.environ.get("DISABLE_ATTACHMENT_STORAGE")) + if is_disabled and log: + _logger.warning(msg) + return is_disabled + + def _get_storage_force_db_config(self): + return self.env["fs.storage"].get_force_db_for_default_attachment_rules( + self._storage() + ) + + def _store_in_db_instead_of_object_storage_domain(self): + """Return a domain for attachments that must be forced to DB + + Read the docstring of ``_store_in_db_instead_of_object_storage`` for + more details. + + Used in ``force_storage_to_db_for_special_fields`` to find records + to move from the object storage to the database. + + The domain must be inline with the conditions in + ``_store_in_db_instead_of_object_storage``. + """ + domain = [] + storage_config = self._get_storage_force_db_config() + for mimetype_key, limit in storage_config.items(): + part = [("mimetype", "=like", "{}%".format(mimetype_key))] + if limit: + part = AND([part, [("file_size", "<=", limit)]]) + domain = OR([domain, part]) + return domain + + def _store_in_db_instead_of_object_storage(self, data, mimetype): + """Return whether an attachment must be stored in db + + When we are using an Object Storage. This is sometimes required + because the object storage is slower than the database/filesystem. + + Small images (128, 256) are used in Odoo in list / kanban views. We + want them to be fast to read. + They are generally < 50KB (default configuration) so they don't take + that much space in database, but they'll be read much faster than from + the object storage. + + The assets (application/javascript, text/css) are stored in database + as well whatever their size is: + + * a database doesn't have thousands of them + * of course better for performance + * better portability of a database: when replicating a production + instance for dev, the assets are included + + The configuration can be modified on the fs.storage record, in the + field ``force_db_for_default_attachment_rules``, as a dictionary, for + instance:: + + {"image/": 51200, "application/javascript": 0, "text/css": 0} + + Where the key is the beginning of the mimetype to configure and the + value is the limit in size below which attachments are kept in DB. + 0 means no limit. + + These limits are applied only if the storage is the default one for + attachments (see ``_storage``). + + The conditions are also applied into the domain of the method + ``_store_in_db_instead_of_object_storage_domain`` used to move records + from a filesystem storage to the database. + + """ + if self._is_storage_disabled(): + return True + storage_config = self._get_storage_force_db_config() + for mimetype_key, limit in storage_config.items(): + if mimetype.startswith(mimetype_key): + if not limit: + return True + bin_data = data + return len(bin_data) <= limit + return False + + def _get_datas_related_values(self, data, mimetype): + storage = self.env.context.get("storage_location") or self._storage() + if data and storage in self._get_storage_codes(): + if self._store_in_db_instead_of_object_storage(data, mimetype): + # compute the fields that depend on datas + bin_data = data + values = { + "file_size": len(bin_data), + "checksum": self._compute_checksum(bin_data), + "index_content": self._index(bin_data, mimetype), + "store_fname": False, + "db_datas": data, + } + return values + return super()._get_datas_related_values(data, mimetype) + + ########################################################### + # Odoo methods that we override to use the object storage # + ########################################################### + @api.model + def _storage(self): + # We check if a filesystem storage is configured for attachments + storage = self.env["fs.storage"].get_default_storage_code_for_attachments() + if not storage: + # If not, we use the default storage configured into odoo + storage = super()._storage() + return storage + + @api.model_create_multi + def create(self, vals_list): + attachments = super().create(vals_list) + attachments._enforce_meaningful_storage_filename() + return attachments + + def write(self, vals): + if not self: + return self + if ("datas" in vals or "raw" in vals) and not ( + "name" in vals or "mimetype" in vals + ): + # When we write on an attachment, if the mimetype is not provided, it + # will be computed from the name. The problem is that if you assign a + # value to the field ``datas`` or ``raw``, the name is not provided + # nor the mimetype, so the mimetype will be set to ``application/octet- + # stream``. + # We want to avoid this, so we take the mimetype of the first attachment + # and we set it on all the attachments if they all have the same mimetype. + # If they don't have the same mimetype, we raise an error. + # OPW-3277070 + mimetypes = self.mapped("mimetype") + if len(set(mimetypes)) == 1: + vals["mimetype"] = mimetypes[0] + else: + raise UserError( + _( + "You can't write on multiple attachments with different " + "mimetypes at the same time." + ) + ) + return super().write(vals) + + @api.model + def _file_read(self, fname): + if self._is_file_from_a_storage(fname): + return self._storage_file_read(fname) + else: + return super()._file_read(fname) + + @api.model + def _file_write(self, bin_data, checksum): + location = self.env.context.get("storage_location") or self._storage() + if location in self._get_storage_codes(): + filename = self._storage_file_write(bin_data) + else: + filename = super()._file_write(bin_data, checksum) + return filename + + @api.model + def _file_delete(self, fname) -> None: # pylint: disable=missing-return + if self._is_file_from_a_storage(fname): + cr = self.env.cr + # using SQL to include files hidden through unlink or due to record + # rules + cr.execute( + "SELECT COUNT(*) FROM ir_attachment WHERE store_fname = %s", (fname,) + ) + count = cr.fetchone()[0] + if not count: + self._storage_file_delete(fname) + else: + super()._file_delete(fname) + + def _set_attachment_data(self, asbytes) -> None: # pylint: disable=missing-return + super()._set_attachment_data(asbytes) + self._enforce_meaningful_storage_filename() + + ############################################## + # Internal methods to use the object storage # + ############################################## + @api.model + def _storage_file_read(self, fname: str) -> bytes | None: + """Read the file from the filesystem storage""" + fs, _storage, fname = self._fs_parse_store_fname(fname) + with fs.open(fname, "rb") as fs: + return fs.read() + + @api.model + def _storage_file_write(self, bin_data: bytes) -> str: + """Write the file to the filesystem storage""" + storage = self.env.context.get("storage_location") or self._storage() + fs = self._get_fs_storage_for_code(storage) + path = self._get_fs_path(storage, bin_data) + dirname = os.path.dirname(path) + if not fs.exists(dirname): + fs.makedirs(dirname) + fname = f"{storage}://{path}" + with fs.open(path, "wb") as fs: + fs.write(bin_data) + self._fs_mark_for_gc(fname) + return fname + + @api.model + def _storage_file_delete(self, fname): + """Delete the file from the filesystem storage + + It's safe to use the fname (the store_fname) to delete the file because + even if it's the full path to the file, the gc will only delete the file + if they belong to the configured storage directory path. + """ + self._fs_mark_for_gc(fname) + + @api.model + def _get_fs_path(self, storage_code: str, bin_data: bytes) -> str: + """Compute the path to store the file in the filesystem storage""" + key = self.env.context.get("force_storage_key") + if not key: + key = self._compute_checksum(bin_data) + if self.env["fs.storage"]._must_optimize_directory_path(storage_code): + # Generate a unique directory path based on the file's hash + key = os.path.join(key[:2], key[2:4], key) + # Generate a unique directory path based on the file's hash + return key + + def _build_fs_filename(self): + """Build the filename to store in the filesystem storage + + The filename is computed from the name, the extension and a version + number. The version number is incremented each time we build a new + filename. To know if a filename has already been build, we check if + the fs_filename field is set. If it is set, we increment the version + number. The version number is taken from the computed filename. + + The format of the filename is: + --. + """ + self.ensure_one() + filename, extension = os.path.splitext(self.name) + if not extension: + extension = mimetypes.guess_extension(self.mimetype) + version = 0 + if self.fs_filename: + parsed = self._parse_fs_filename(self.fs_filename) + if parsed: + version = parsed[2] + 1 + return "{}{}".format( + slugify( + "{}-{}-{}".format(filename, self.id, version), + regex_pattern=REGEX_SLUGIFY, + ), + extension, + ) + + def _enforce_meaningful_storage_filename(self) -> None: + """Enforce meaningful filename for files stored in the filesystem storage + + The filename of the file in the filesystem storage is computed from + the mimetype and the name of the attachment. This method is called + when an attachment is created to ensure that the filename of the file + in the filesystem keeps the same meaning as the name of the attachment. + + Keeping the same meaning and mimetype is important to also ease to provide + a meaningful and SEO friendly URL to the file in the filesystem storage. + """ + for attachment in self: + if not self._is_file_from_a_storage(attachment.store_fname): + continue + fs, storage, filename = self._get_fs_parts() + + if self.env["fs.storage"]._must_use_filename_obfuscation(storage): + attachment.fs_filename = filename + continue + if self._is_fs_filename_meaningful(filename): + continue + new_filename = attachment._build_fs_filename() + # we must keep the same full path as the original filename + new_filename_with_path = os.path.join( + os.path.dirname(filename), new_filename + ) + fs.rename(filename, new_filename_with_path) + attachment.fs_filename = new_filename + # we need to update the store_fname with the new filename by + # calling the write method of the field since the write method + # of ir_attachment prevent normal write on store_fname + attachment._force_write_store_fname(f"{storage}://{new_filename}") + self._fs_mark_for_gc(attachment.store_fname) + + def _force_write_store_fname(self, store_fname): + """Force the write of the store_fname field + + The base implementation of the store_fname field prevent the write + of the store_fname field. This method bypass this limitation by + calling the write method of the field directly. + """ + self._fields["store_fname"].write(self, store_fname) + + @api.model + def _get_fs_storage_for_code( + self, + code: str, + ) -> fsspec.AbstractFileSystem | None: + """Return the filesystem for the given storage code""" + fs = self.env["fs.storage"].get_fs_by_code(code) + if not fs: + raise SystemError(f"No Filesystem storage for code {code}") + return fs + + @api.model + def _fs_parse_store_fname( + self, fname: str + ) -> tuple[fsspec.AbstractFileSystem, str, str]: + """Return the filesystem, the storage code and the path for the given fname + + :param fname: the fname to parse + :param base: if True, return the base filesystem + """ + partition = fname.partition("://") + storage_code = partition[0] + fs = self._get_fs_storage_for_code(storage_code) + fname = partition[2] + return fs, storage_code, fname + + @api.model + def _is_fs_filename_meaningful(self, filename: str) -> bool: + """Return True if the filename is meaningful + A filename is meaningful if it's formatted as + """ + parsed = self._parse_fs_filename(filename) + if not parsed: + return False + name, res_id, version, extension = parsed + return bool(name and res_id and version is not None and extension) + + @api.model + def _parse_fs_filename(self, filename: str) -> tuple[str, int, int, str] | None: + """Parse the filename and return the name, id, version and extension + --. + """ + if not filename: + return None + filename = os.path.basename(filename) + match = FS_FILENAME_RE_PARSER.match(filename) + if not match: + return None + name, res_id, version, extension = match.groups() + return name, int(res_id), int(version), extension + + @api.model + def _is_file_from_a_storage(self, fname): + if not fname: + return False + for storage_code in self._get_storage_codes(): + if self._is_storage_disabled(storage_code): + continue + uri = "{}://".format(storage_code) + if fname.startswith(uri): + return True + return False + + @api.model + def _fs_mark_for_gc(self, fname): + """Mark the file for deletion + + The file will be deleted by the garbage collector if it's no more + referenced by any attachment. We use a garbage collector to enforce + the transaction mechanism between Odoo and the filesystem storage. + Files are added to the garbage collector when: + - each time a file is created in the filesystem storage + - an attachment is deleted + + Whatever the result of the current transaction, the information of files + marked for deletion is stored in the database. + + When the garbage collector is called, it will check if the file is still + referenced by an attachment. If not, the file is physically deleted from + the filesystem storage. + + If the creation of the attachment fails, since the file is marked for + deletion when it's written into the filesystem storage, it will be + deleted by the garbage collector. + + If the content of the attachment is updated, we always create a new file. + This new file is marked for deletion and the old one too. If the transaction + succeeds, the old file is deleted by the garbage collector since it's no + more referenced by any attachment. If the transaction fails, the old file + is not deleted since it's still referenced by the attachment but the new + file is deleted since it's marked for deletion and not referenced. + """ + self.env["fs.file.gc"]._mark_for_gc(fname) + + def _get_fs_parts( + self, + ) -> tuple[fsspec.AbstractFileSystem, str, str] | tuple[None, None, None]: + """Return the filesystem, the storage code and the path for the current attachment""" + if not self.store_fname: + return None, None, None + return self._fs_parse_store_fname(self.store_fname) + + def open( + self, + mode="rb", + block_size=None, + cache_options=None, + compression=None, + new_version=True, + **kwargs, + ) -> io.IOBase: + """ + Return a file-like object from the filesystem storage where the attachment + content is stored. + + In read mode, this method works for all attachments, even if the content + is stored in the database or into the odoo filestore or a filesystem storage. + + The resultant instance must function correctly in a context ``with`` + block. + + (parameters are ignored in the case of the database storage). + + Parameters + ---------- + path: str + Target file + mode: str like 'rb', 'w' + See builtin ``open()`` + block_size: int + Some indication of buffering - this is a value in bytes + cache_options : dict, optional + Extra arguments to pass through to the cache. + compression: string or None + If given, open file using compression codec. Can either be a compression + name (a key in ``fsspec.compression.compr``) or "infer" to guess the + compression from the filename suffix. + new_version: bool + If True, and mode is 'w', create a new version of the file. + If False, and mode is 'w', overwrite the current version of the file. + This flag is True by default to avoid data loss and ensure transaction + mechanism between Odoo and the filesystem storage. + encoding, errors, newline: passed on to TextIOWrapper for text mode + + Returns + ------- + A file-like object + + TODO if open with 'w' in mode, we could use a buffered IO detecting that + the content is modified and invalidating the attachment cache... + """ + self.ensure_one() + return AttachmentFileLikeAdapter( + self, + mode=mode, + block_size=block_size, + cache_options=cache_options, + compression=compression, + new_version=new_version, + **kwargs, + ) + + @contextmanager + def _do_in_new_env(self, new_cr=False): + """Context manager that yields a new environment + + Using a new Odoo Environment thus a new PG transaction. + """ + if new_cr: + registry = odoo.modules.registry.Registry.new(self.env.cr.dbname) + with closing(registry.cursor()) as cr: + try: + yield self.env(cr=cr) + except Exception: + cr.rollback() + raise + else: + # disable pylint error because this is a valid commit, + # we are in a new env + cr.commit() # pylint: disable=invalid-commit + else: + # make a copy + yield self.env() + + def _get_storage_codes(self): + """Get the list of filesystem storage active in the system""" + return self.env["fs.storage"].sudo().get_storage_codes() + + ################################ + # useful methods for migration # + ################################ + + def _move_attachment_to_store(self): + self.ensure_one() + _logger.info("inspecting attachment %s (%d)", self.name, self.id) + fname = self.store_fname + storage = fname.partition("://")[0] + if self._is_storage_disabled(storage): + fname = False + if fname: + # migrating from filesystem filestore + # or from the old 'store_fname' without the bucket name + _logger.info("moving %s on the object storage", fname) + self.write( + { + "datas": self.datas, + # this is required otherwise the + # mimetype gets overriden with + # 'application/octet-stream' + # on assets + "mimetype": self.mimetype, + } + ) + _logger.info("moved %s on the object storage", fname) + return self._full_path(fname) + elif self.db_datas: + _logger.info("moving on the object storage from database") + self.write({"datas": self.datas}) + + @api.model + def force_storage(self): + if not self.env["res.users"].browse(self.env.uid)._is_admin(): + raise AccessError(_("Only administrators can execute this action.")) + location = self.env.context.get("storage_location") or self._storage() + if location not in self._get_storage_codes(): + return super().force_storage() + self._force_storage_to_object_storage() + + @api.model + def force_storage_to_db_for_special_fields(self, new_cr=False): + """Migrate special attachments from Object Storage back to database + + The access to a file stored on the objects storage is slower + than a local disk or database access. For attachments like + image_small that are accessed in batch for kanban views, this + is too slow. We store this type of attachment in the database. + + This method can be used when migrating a filestore where all the files, + including the special files (assets, image_small, ...) have been pushed + to the Object Storage and we want to write them back in the database. + + It is not called anywhere, but can be called by RPC or scripts. + """ + storage = self._storage() + if self._is_storage_disabled(storage): + return + if storage not in self._get_storage_codes(): + return + + domain = AND( + ( + normalize_domain( + [ + ("store_fname", "=like", "{}://%".format(storage)), + # for res_field, see comment in + # _force_storage_to_object_storage + "|", + ("res_field", "=", False), + ("res_field", "!=", False), + ] + ), + normalize_domain(self._store_in_db_instead_of_object_storage_domain()), + ) + ) + + with self._do_in_new_env(new_cr=new_cr) as new_env: + model_env = new_env["ir.attachment"].with_context(prefetch_fields=False) + attachment_ids = model_env.search(domain).ids + if not attachment_ids: + return + total = len(attachment_ids) + start_time = time.time() + _logger.info( + "Moving %d attachments from %s to" " DB for fast access", total, storage + ) + current = 0 + for attachment_id in attachment_ids: + current += 1 + # if we browse attachments outside of the loop, the first + # access to 'datas' will compute all the 'datas' fields at + # once, which means reading hundreds or thousands of files at + # once, exhausting memory + attachment = model_env.browse(attachment_id) + # this write will read the datas from the Object Storage and + # write them back in the DB (the logic for location to write is + # in the 'datas' inverse computed field) + # we need to write the mimetype too, otherwise it will be + # overwritten with 'application/octet-stream' on assets. On each + # write, the mimetype is recomputed if not given. If we don't + # pass it nor the name, the mimetype will be set to the default + # value 'application/octet-stream' on assets. + attachment.write({"datas": attachment.datas}) + if current % 100 == 0 or total - current == 0: + _logger.info( + "attachment %s/%s after %.2fs", + current, + total, + time.time() - start_time, + ) + + @api.model + def _force_storage_to_object_storage(self, new_cr=False): + _logger.info("migrating files to the object storage") + storage = self.env.context.get("storage_location") or self._storage() + if self._is_storage_disabled(storage): + return + # The weird "res_field = False OR res_field != False" domain + # is required! It's because of an override of _search in ir.attachment + # which adds ('res_field', '=', False) when the domain does not + # contain 'res_field'. + # https://github.com/odoo/odoo/blob/9032617120138848c63b3cfa5d1913c5e5ad76db/ + # odoo/addons/base/ir/ir_attachment.py#L344-L347 + domain = [ + "!", + ("store_fname", "=like", "{}://%".format(storage)), + "|", + ("res_field", "=", False), + ("res_field", "!=", False), + ] + # We do a copy of the environment so we can workaround the cache issue + # below. We do not create a new cursor by default because it causes + # serialization issues due to concurrent updates on attachments during + # the installation + with self._do_in_new_env(new_cr=new_cr) as new_env: + model_env = new_env["ir.attachment"] + ids = model_env.search(domain).ids + files_to_clean = [] + for attachment_id in ids: + try: + with new_env.cr.savepoint(): + # check that no other transaction has + # locked the row, don't send a file to storage + # in that case + self.env.cr.execute( + "SELECT id " + "FROM ir_attachment " + "WHERE id = %s " + "FOR UPDATE NOWAIT", + (attachment_id,), + log_exceptions=False, + ) + + # This is a trick to avoid having the 'datas' + # function fields computed for every attachment on + # each iteration of the loop. The former issue + # being that it reads the content of the file of + # ALL the attachments on each loop. + new_env.clear() + attachment = model_env.browse(attachment_id) + path = attachment._move_attachment_to_store() + if path: + files_to_clean.append(path) + except psycopg2.OperationalError: + _logger.error( + "Could not migrate attachment %s to S3", attachment_id + ) + + # delete the files from the filesystem once we know the changes + # have been committed in ir.attachment + if files_to_clean: + new_env.cr.commit() + clean_fs(files_to_clean) + + +class AttachmentFileLikeAdapter(object): + """ + This class is a wrapper class around the ir.attachment model. It is used to + open the ir.attachment as a file and to read/write data to it. + + When the content of the file is stored into the odoo filestore or in a + filesystem storage, this object allows you to read/write the content from + the file in a direct way without having to read/write the whole file into + memory. When the content of the file is stored into database, this content + is read/written from/into a buffer in memory. + + Parameters + ---------- + attachment : ir.attachment + The attachment to open as a file. + mode: str like 'rb', 'w' + See builtin ``open()`` + block_size: int + Some indication of buffering - this is a value in bytes + cache_options : dict, optional + Extra arguments to pass through to the cache. + compression: string or None + If given, open file using compression codec. Can either be a compression + name (a key in ``fsspec.compression.compr``) or "infer" to guess the + compression from the filename suffix. + new_version: bool + If True, and mode is 'w', create a new version of the file. + If False, and mode is 'w', overwrite the current version of the file. + This flag is True by default to avoid data loss and ensure transaction + mechanism between Odoo and the filesystem storage. + encoding, errors, newline: passed on to TextIOWrapper for text mode + + You can use this class to adapt an attachment object as a file in 2 ways: + * as a context manager wrapping the attachment object as a file + * or as a nomral utility class + + Examples + + >>> with AttachmentFileLikeAdapter(attachment, mode="rb") as f: + ... f.read() + b'Hello World' + # at the end of the context manager, the file is closed + >>> f = AttachmentFileLikeAdapter(attachment, mode="rb") + >>> f.read() + b'Hello World' + # you have to close the file manually + >>> f.close() + + """ + + def __init__( + self, + attachment: IrAttachment, + mode: str = "rb", + block_size: int | None = None, + cache_options: dict | None = None, + compression: str | None = None, + new_version: bool = False, + **kwargs, + ): + self._attachment = attachment + self._mode = mode + self._block_size = block_size + self._cache_options = cache_options + self._compression = compression + self._new_version = new_version + self._kwargs = kwargs + + # state attributes + self._file: io.IOBase | None = None + self._filesystem: fsspec.AbstractFileSystem | None = None + self._new_store_fname: str | None = None + + @property + def attachment(self) -> IrAttachment: + """The attachment object the file is related to""" + return self._attachment + + @property + def mode(self) -> str: + """The mode used to open the file""" + return self._mode + + @property + def block_size(self) -> int | None: + """The block size used to open the file""" + return self._block_size + + @property + def cache_options(self) -> dict | None: + """The cache options used to open the file""" + return self._cache_options + + @property + def compression(self) -> str | None: + """The compression used to open the file""" + return self._compression + + @property + def new_version(self) -> bool: + """Is the file open for a new version""" + return self._new_version + + @property + def kwargs(self) -> dict: + """The kwargs passed when opening the file on the""" + return self._kwargs + + @property + def _is_open_for_modify(self) -> bool: + """Is the file open for modification + A file is open for modification if it is open for writing or appending + """ + return "w" in self.mode or "a" in self.mode + + @property + def _is_open_for_read(self) -> bool: + """Is the file open for reading""" + return "r" in self.mode + + @property + def _is_stored_in_db(self) -> bool: + """Is the file stored in database""" + return self.attachment._storage() == "db" + + def __enter__(self) -> io.IOBase: + """Called when entering the context manager + + Create the file object and return it. + """ + # we call the attachment instance to get the file object + self._file_open() + return self._file + + def _file_open(self) -> io.IOBase: + """Open the attachment content as a file-like object + + This method will initialize the following attributes: + + * _file: the file-like object. + * _filesystem: filesystem object. + * _new_store_fname: the new store_fname if the file is + opened for a new version. + """ + new_store_fname = None + if ( + self._is_open_for_read + or (self._is_open_for_modify and not self.new_version) + or self._is_stored_in_db + ): + if self.attachment._is_file_from_a_storage(self.attachment.store_fname): + fs, _storage, fname = self.attachment._get_fs_parts() + filepath = fname + filesystem = fs + elif self.attachment.store_fname: + filepath = self.attachment._full_path(self.attachment.store_fname) + filesystem = fsspec.filesystem("file") + else: + filepath = f"{self.attachment.id}" + filesystem = fsspec.filesystem("memory") + if "a" in self.mode or self._is_open_for_read: + filesystem.pipe_file(filepath, self.attachment.db_datas) + the_file = filesystem.open( + filepath, + mode=self.mode, + block_size=self.block_size, + cache_options=self.cache_options, + compression=self.compression, + **self.kwargs, + ) + else: + # mode='w' and new_version=True and storage != 'db' + # We must create a new file with a new name. If we are in an + # append mode, we must copy the content of the old file (or create + # the new one by copy of the old one). + # to not break the storage plugin mechanism, we'll use the + # _file_write method to create the new empty file with a random + # content and checksum to avoid collision. + content = self._gen_random_content() + checksum = self.attachment._compute_checksum(content) + new_store_fname = self.attachment._file_write(content, checksum) + if self.attachment._is_file_from_a_storage(new_store_fname): + ( + filesystem, + _storage, + new_filepath, + ) = self.attachment._fs_parse_store_fname(new_store_fname) + _fs, _storage, old_filepath = self.attachment._get_fs_parts() + else: + new_filepath = self.attachment._full_path(new_store_fname) + old_filepath = self.attachment._full_path(self.attachment.store_fname) + filesystem = fsspec.filesystem("file") + if "a" in self.mode: + filesystem.cp_file(old_filepath, new_filepath) + the_file = filesystem.open( + new_filepath, + mode=self.mode, + block_size=self.block_size, + cache_options=self.cache_options, + compression=self.compression, + **self.kwargs, + ) + self._filesystem = filesystem + self._new_store_fname = new_store_fname + self._file = the_file + + def _gen_random_content(self, size=256): + """Generate a random content of size bytes""" + return os.urandom(size) + + def _file_close(self): + """Close the file-like object opened by _file_open""" + if not self._file: + return + if not self._file.closed: + self._file.flush() + self._file.close() + if self._is_open_for_modify: + attachment_data = self._get_attachment_data() + if ( + not (self.new_version and self._new_store_fname) + and self._is_stored_in_db + ): + attachment_data["raw"] = self._file.getvalue() + self.attachment.write(attachment_data) + if self.new_version and self._new_store_fname: + self.attachment._force_write_store_fname(self._new_store_fname) + self.attachment._enforce_meaningful_storage_filename() + self._ensure_cache_consistency() + + def _get_attachment_data(self) -> dict: + ret = {} + if self._file: + file_path = self._file.path + if hasattr(self._filesystem, "path"): + file_path = file_path.replace(self._filesystem.path, "") + ret["checksum"] = self._filesystem.checksum(file_path) + ret["file_size"] = self._filesystem.size(file_path) + # TODO index_content is too expensive to compute here or should be configurable + # data = self._file.read() + # ret["index_content"] = self.attachment._index_content(data, + # self.attachment.mimetype, ret["checksum"]) + ret["index_content"] = b"" + + return ret + + def _ensure_cache_consistency(self): + """Ensure the cache consistency once the file is closed""" + if self._is_open_for_modify and not self._is_stored_in_db: + self.attachment.invalidate_recordset(fnames=["raw", "datas", "db_datas"]) + if ( + self.attachment.res_model + and self.attachment.res_id + and self.attachment.res_field + ): + self.attachment.env[self.attachment.res_model].browse( + self.attachment.res_id + ).invalidate_recordset(fnames=[self.attachment.res_field]) + + def __exit__(self, *args): + """Called when exiting the context manager. + + Close the file if it is not already closed. + """ + self._file_close() + + def __getattr__(self, attr): + """ + Forward all other attributes to the underlying file object. + + This method is required to make the object behave like a file object + when the AttachmentFileLikeAdapter is used outside a context manager. + + .. code-block:: python + + f = AttachmentFileLikeAdapter(attachment) + f.read() + + """ + if not self._file: + self.__enter__() + return getattr(self._file, attr) diff --git a/fs_attachment/models/ir_binary.py b/fs_attachment/models/ir_binary.py new file mode 100644 index 0000000000..d06d5f1db0 --- /dev/null +++ b/fs_attachment/models/ir_binary.py @@ -0,0 +1,41 @@ +# Copyright 2023 ACSONE SA/NV +# License AGPL-3.0 or later (https://www.gnu.org/licenses/agpl). +import logging + +from odoo import models + +from ..fs_stream import FsStream + +_logger = logging.getLogger(__name__) + + +class IrBinary(models.AbstractModel): + + _inherit = "ir.binary" + + def _record_to_stream(self, record, field_name): + # Extend base implementation to support attachment stored into a + # filesystem storage + fs_attachment = None + if record._name == "ir.attachment" and record.fs_filename: + fs_attachment = record + record.check_field_access_rights("read", [field_name]) + field_def = record._fields[field_name] + if field_def.attachment and not field_def.compute and not field_def.related: + field_attachment = ( + self.env["ir.attachment"] + .sudo() + .search( + domain=[ + ("res_model", "=", record._name), + ("res_id", "=", record.id), + ("res_field", "=", field_name), + ], + limit=1, + ) + ) + if field_attachment.fs_filename: + fs_attachment = field_attachment + if fs_attachment: + return FsStream.from_fs_attachment(fs_attachment) + return super()._record_to_stream(record, field_name) diff --git a/fs_attachment/models/strtobool.py b/fs_attachment/models/strtobool.py new file mode 100644 index 0000000000..b1a849f283 --- /dev/null +++ b/fs_attachment/models/strtobool.py @@ -0,0 +1,21 @@ +_MAP = { + "y": True, + "yes": True, + "t": True, + "true": True, + "on": True, + "1": True, + "n": False, + "no": False, + "f": False, + "false": False, + "off": False, + "0": False, +} + + +def strtobool(value): + try: + return _MAP[str(value).lower()] + except KeyError as e: + raise ValueError('"{}" is not a valid bool value'.format(value)) from e diff --git a/fs_attachment/readme/CONTRIBUTORS.rst b/fs_attachment/readme/CONTRIBUTORS.rst new file mode 100644 index 0000000000..b28dec9346 --- /dev/null +++ b/fs_attachment/readme/CONTRIBUTORS.rst @@ -0,0 +1,11 @@ +Thierry Ducrest +Guewen Baconnier +Julien Coux +Akim Juillerat +Thomas Nowicki +Vincent Renaville +Denis Leemann +Patrick Tombez +Don Kendall +Stephane Mangin +Laurent Mignon diff --git a/fs_attachment/readme/DESCRIPTION.rst b/fs_attachment/readme/DESCRIPTION.rst new file mode 100644 index 0000000000..f0ca6ff482 --- /dev/null +++ b/fs_attachment/readme/DESCRIPTION.rst @@ -0,0 +1,45 @@ +In some cases, you need to store attachment in another system that the Odoo's +filestore. For example, when your deployment is based on a multi-server +architecture to ensure redundancy and scalability, your attachments must +be stored in a way that they are accessible from all the servers. In this +way, you can use a shared storage system like NFS or a cloud storage like +S3 compliant storage, or.... + +This addon extend the storage mechanism of Odoo's attachments to allow +you to store them in any storage filesystem supported by the Python +library `fsspec `_ and made +available via the `fs_storage` addon. + +In contrast to Odoo, when a file is stored into an external storage, this +addon ensures that the filename keeps its meaning (In odoo the filename +into the filestore is the file content checksum). Concretely the filename +is based on the pattern: +'--.' + +This addon also adds on the attachments 2 new fields to use +to retrieve the file content from a URL: + +* ``Internal URL``: URL to retrieve the file content from the Odoo's + filestore. +* ``Filesystem URL``: URL to retrieve the file content from the external + storage. + +.. note:: + + The internal URL is always available, but the filesystem URL is only + available when the attachment is stored in an external storage. + Particular attention has been paid to limit as much as possible the consumption + of resources necessary to serve via Odoo the content stored in an external + filesystem. The implementation is based on an end-to-end streaming of content + between the external filesystem and the Odoo client application by default. + Nevertheless, if your content is available via a URL on the external filesystem, + you can configure the storage to use the x-sendfile mechanism to serve the + content if it's activated on your Odoo instance. In this case, the content + served by Odoo at the internal URL will be proxied to the filesystem URL + by nginx. + +Last but not least, the addon adds a new method `open` on the attachment. This +method allows you to open the attachment as a file. For attachments stored into +the filestore or in an external filesystem, it allows you to directly read from +and write to the file and therefore minimize the memory consumption since data +are not kept into memory before being written into the database. diff --git a/fs_attachment/readme/USAGE.rst b/fs_attachment/readme/USAGE.rst new file mode 100644 index 0000000000..9cd3b1b002 --- /dev/null +++ b/fs_attachment/readme/USAGE.rst @@ -0,0 +1,209 @@ +Configuration +~~~~~~~~~~~~~ + +The configuration is done through the creation of a filesytem storage record +into odoo. To create a new storage, go to the menu +``Settings > Technical > FS Storage`` and click on ``Create``. + +In addition to the common fields available to configure a storage, specifics +fields are available under the section 'Attachment' to configure the way +attachments will be stored in the filesystem. + +* ``Optimizes Directory Path``: This option is useful if you need to prevent + having too many files in a single directory. It will create a directory + structure based on the attachment's checksum (with 2 levels of depth) + For example, if the checksum is ``123456789``, the file will be stored in the + directory ``/path/to/storage/12/34/my_file-1-0.txt``. +* ``Autovacuum GC``: This is used to automatically remove files from the filesystem + when it's no longer referenced in Odoo. Some storage backends (like S3) may + charge you for the storage of files, so it's important to remove them when + they're no longer needed. In some cases, this option is not desirable, for + example if you're using a storage backend to store images shared with others + systems (like your website) and you don't want to remove the files from the + storage while they're still referenced into the others systems. + This mechanism is based on a ``fs.file.gc`` model used to collect the files + to remove. This model is automatically populated by the ``ir.attachment`` + model when a file is removed from the database. If you disable this option, + you'll have to manually take care of the records in the ``fs.file.gc`` for + your filesystem storage. +* ``Use As Default For Attachment``: This options allows you to declare the storage + as the default one for attachments. If you have multiple filesystem storage + configured, you can choose which one will be used by default for attachments. + Once activated, attachments created without specifying a storage will be + stored in this default storage. +* ``Force DB For Default Attachment Rules``: This option is useful if you want to + force the storage of some attachments in the database, even if you have a + default filesystem storage configured. This is specially useful when you're + using a storage backend like S3, where the latency of the network can be + high. This option is a JSON field that allows you to define the mimetypes and + the size limit below which the attachments will be stored in the database. + + Small images (128, 256) are used in Odoo in list / kanban views. We + want them to be fast to read. + They are generally < 50KB (default configuration) so they don't take + that much space in database, but they'll be read much faster than from + the object storage. + + The assets (application/javascript, text/css) are stored in database + as well whatever their size is: + + * a database doesn't have thousands of them + * of course better for performance + * better portability of a database: when replicating a production + instance for dev, the assets are included + + The default configuration is: + + {"image/": 51200, "application/javascript": 0, "text/css": 0} + + Where the key is the beginning of the mimetype to configure and the + value is the limit in size below which attachments are kept in DB. + 0 means no limit. + + Default configuration means: + + * images mimetypes (image/png, image/jpeg, ...) below 50KB are + stored in database + * application/javascript are stored in database whatever their size + * text/css are stored in database whatever their size + + This option is only available on the filesystem storage that is used + as default for attachments. + +Another key feature of this module is the ability to get access to the attachments +from URLs. + +* ``Base URL``: This is the base URL used to access the attachments from the + filesystem storage itself. If your storage doesn't provide a way to access + the files from a URL, you can leave this field empty. +* ``Is Directory Path In URL``: Normally the directory patch configured on the storage + is not included in the URL. If you want to include it, you can activate this option. +* ``Use X-Sendfile To Serve Internal Url``: If checked and odoo is behind a proxy + that supports x-sendfile, the content served by the attachment's internal URL + will be served by the proxy using the filesystem url path if defined (This field + is available on the attachment if the storage is configured with a base URL) + If not, the file will be served by odoo that will stream the content read from + the filesystem storage. This option is useful to avoid to serve files from odoo + and therefore to avoid to load the odoo process. + + To be fully functional, this option requires the proxy to support x-sendfile + (apache) or x-accel-redirect (nginx). You must also configure your proxy by + adding for each storage a rule to redirect the url rooted at the 'storagge code' + to the server serving the files. For example, if you have a storage with the + code 'my_storage' and a server serving the files at the url 'http://myserver.com', + you must add the following rule in your proxy configuration: + + .. code-block:: nginx + + location /my_storage/ { + internal; + proxy_pass http://myserver.com; + } + + With this configuration a call to '/web/content//" + for a file stored in the 'my_storage' storage will generate a response by odoo + with the URI + ``/my_storage//--`` + in the headers ``X-Accel-Redirect`` and ``X-Sendfile`` and the proxy will redirect to + ``http://myserver.com//--``. + + see https://www.nginx.com/resources/wiki/start/topics/examples/x-accel/ for more + information. + +* ``Use Filename Obfuscation``: If checked, the filename used to store the content + into the filesystem storage will be obfuscated. This is useful to avoid to + expose the real filename of the attachments outside of the Odoo database. + The filename will be obfuscated by using the checksum of the content. This option + is to avoid when the content of your filestore is shared with other systems + (like your website) and you want to keep a meaningful filename to ensure + SEO. This option is disabled by default. + + +Server Environment +~~~~~~~~~~~~~~~~~~ + +When you configure a storage through the use of server environment file, you can +provide values for the following keys: + +* ``optimizes_directory_path`` +* ``autovacuum_gc`` +* ``base_url`` +* ``is_directory_path_in_url`` +* ``use_x_sendfile_to_serve_internal_url`` +* ``use_as_default_for_attachments`` +* ``force_db_for_default_attachment_rules`` +* ``use_filename_obfuscation`` + +For example, the configuration of my storage with code `fsprod` used to store +the attachments by default could be: + +.. code-block:: ini + + [fs_storage.fsprod] + protocol=s3 + options={"endpoint_url": "https://my_s3_server/", "key": "KEY", "secret": "SECRET"} + directory_path=my_bucket + use_as_default_for_attachments=True + use_filename_obfuscation=True + +Advanced usage: Using attachment as a file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The `open` method on the attachment can be used to open manipulate the attachment +as a file object. The object returned by the call to the method implements +methods from ``io.IOBase``. The method can ba called as any other python method. +In such a case, it's your responsibility to close the file at the end of your +process. + +.. code-block:: python + + attachment = self.env.create({"name": "test.txt"}) + the_file = attachment.open("wb") + try: + the_file.write(b"content") + finally: + the_file.close() + +The result of the call to `open` also works in a context ``with`` block. In such +a case, when the code exit the block, the file is automatically closed. + +.. code-block:: python + + attachment = self.env.create({"name": "test.txt"}) + with attachment.open("wb") as the_file: + the_file.write(b"content") + +It's always safer to prefer the second approach. + +When your attachment is stored into the odoo filestore or into an external +filesystem storage, each time you call the open method, a new file is created. +This way of doing ensures that if the transaction is rollback the original content +is preserve. Nevertheless you could have use cases where you would like to write +to the existing file directly. For example you could create an empty attachment +to store a csv report and then use the `open` method to write your content directly +into the new file. To support this kind a use cases, the parameter `new_version` +can be passed as `False` to avoid the creation of a new file. + +.. code-block:: python + + attachment = self.env.create({"name": "test.txt"}) + with attachment.open("w", new_version=False) as f: + writer = csv.writer(f, delimiter=";") + .... + + +Tips & Tricks +~~~~~~~~~~~~~ + +* When working in multi staging environments, the management of the attachments + can be tricky. For example, if you have a production instance and a staging + instance based on a backup of the production environment, you may want to have + the attachments shared between the two instances BUT you don't want to have + one instance removing or modifying the attachments of the other instance. + + To do so, you can add on your staging instances a new storage and declare it + as the default storage to use for attachments. This way, all the new attachments + will be stored in this new storage but the attachments created on the production + instance will still be read from the production storage. Be careful to adapt the + configuration of your storage to the production environment to make it read only. + (The use of server environment files is a good way to do so). diff --git a/fs_attachment/security/fs_file_gc.xml b/fs_attachment/security/fs_file_gc.xml new file mode 100644 index 0000000000..077c38c430 --- /dev/null +++ b/fs_attachment/security/fs_file_gc.xml @@ -0,0 +1,16 @@ + + + + + + fs.file.gc access name + + + + + + + + + diff --git a/fs_attachment/static/description/index.html b/fs_attachment/static/description/index.html new file mode 100644 index 0000000000..1d4569bb60 --- /dev/null +++ b/fs_attachment/static/description/index.html @@ -0,0 +1,680 @@ + + + + + + +Base Attachment Object Store + + + +
+

Base Attachment Object Store

+ + +

Beta License: AGPL-3 OCA/storage Translate me on Weblate Try me on Runbot

+

In some cases, you need to store attachment in another system that the Odoo’s +filestore. For example, when your deployment is based on a multi-server +architecture to ensure redundancy and scalability, your attachments must +be stored in a way that they are accessible from all the servers. In this +way, you can use a shared storage system like NFS or a cloud storage like +S3 compliant storage, or….

+

This addon extend the storage mechanism of Odoo’s attachments to allow +you to store them in any storage filesystem supported by the Python +library fsspec and made +available via the fs_storage addon.

+

In contrast to Odoo, when a file is stored into an external storage, this +addon ensures that the filename keeps its meaning (In odoo the filename +into the filestore is the file content checksum). Concretely the filename +is based on the pattern: +‘<name-without-extension>-<attachment-id>-<version>.<extension>’

+

This addon also adds on the attachments 2 new fields to use +to retrieve the file content from a URL:

+
    +
  • Internal URL: URL to retrieve the file content from the Odoo’s +filestore.
  • +
  • Filesystem URL: URL to retrieve the file content from the external +storage.
  • +
+
+

Note

+

The internal URL is always available, but the filesystem URL is only +available when the attachment is stored in an external storage. +Particular attention has been paid to limit as much as possible the consumption +of resources necessary to serve via Odoo the content stored in an external +filesystem. The implementation is based on an end-to-end streaming of content +between the external filesystem and the Odoo client application by default. +Nevertheless, if your content is available via a URL on the external filesystem, +you can configure the storage to use the x-sendfile mechanism to serve the +content if it’s activated on your Odoo instance. In this case, the content +served by Odoo at the internal URL will be proxied to the filesystem URL +by nginx.

+
+

Last but not least, the addon adds a new method open on the attachment. This +method allows you to open the attachment as a file. For attachments stored into +the filestore or in an external filesystem, it allows you to directly read from +and write to the file and therefore minimize the memory consumption since data +are not kept into memory before being written into the database.

+

Table of contents

+ +
+

Usage

+
+

Configuration

+

The configuration is done through the creation of a filesytem storage record +into odoo. To create a new storage, go to the menu +Settings > Technical > FS Storage and click on Create.

+

In addition to the common fields available to configure a storage, specifics +fields are available under the section ‘Attachment’ to configure the way +attachments will be stored in the filesystem.

+
    +
  • Optimizes Directory Path: This option is useful if you need to prevent +having too many files in a single directory. It will create a directory +structure based on the attachment’s checksum (with 2 levels of depth) +For example, if the checksum is 123456789, the file will be stored in the +directory /path/to/storage/12/34/my_file-1-0.txt.

    +
  • +
  • Autovacuum GC: This is used to automatically remove files from the filesystem +when it’s no longer referenced in Odoo. Some storage backends (like S3) may +charge you for the storage of files, so it’s important to remove them when +they’re no longer needed. In some cases, this option is not desirable, for +example if you’re using a storage backend to store images shared with others +systems (like your website) and you don’t want to remove the files from the +storage while they’re still referenced into the others systems. +This mechanism is based on a fs.file.gc model used to collect the files +to remove. This model is automatically populated by the ir.attachment +model when a file is removed from the database. If you disable this option, +you’ll have to manually take care of the records in the fs.file.gc for +your filesystem storage.

    +
  • +
  • Use As Default For Attachment: This options allows you to declare the storage +as the default one for attachments. If you have multiple filesystem storage +configured, you can choose which one will be used by default for attachments. +Once activated, attachments created without specifying a storage will be +stored in this default storage.

    +
  • +
  • Force DB For Default Attachment Rules: This option is useful if you want to +force the storage of some attachments in the database, even if you have a +default filesystem storage configured. This is specially useful when you’re +using a storage backend like S3, where the latency of the network can be +high. This option is a JSON field that allows you to define the mimetypes and +the size limit below which the attachments will be stored in the database.

    +

    Small images (128, 256) are used in Odoo in list / kanban views. We +want them to be fast to read. +They are generally < 50KB (default configuration) so they don’t take +that much space in database, but they’ll be read much faster than from +the object storage.

    +

    The assets (application/javascript, text/css) are stored in database +as well whatever their size is:

    +
      +
    • a database doesn’t have thousands of them
    • +
    • of course better for performance
    • +
    • better portability of a database: when replicating a production +instance for dev, the assets are included
    • +
    +

    The default configuration is:

    +
    +

    {“image/”: 51200, “application/javascript”: 0, “text/css”: 0}

    +

    Where the key is the beginning of the mimetype to configure and the +value is the limit in size below which attachments are kept in DB. +0 means no limit.

    +
    +

    Default configuration means:

    +
      +
    • images mimetypes (image/png, image/jpeg, …) below 50KB are +stored in database
    • +
    • application/javascript are stored in database whatever their size
    • +
    • text/css are stored in database whatever their size
    • +
    +

    This option is only available on the filesystem storage that is used +as default for attachments.

    +
  • +
+

Another key feature of this module is the ability to get access to the attachments +from URLs.

+
    +
  • Base URL: This is the base URL used to access the attachments from the +filesystem storage itself. If your storage doesn’t provide a way to access +the files from a URL, you can leave this field empty.

    +
  • +
  • Is Directory Path In URL: Normally the directory patch configured on the storage +is not included in the URL. If you want to include it, you can activate this option.

    +
  • +
  • Use X-Sendfile To Serve Internal Url: If checked and odoo is behind a proxy +that supports x-sendfile, the content served by the attachment’s internal URL +will be served by the proxy using the filesystem url path if defined (This field +is available on the attachment if the storage is configured with a base URL) +If not, the file will be served by odoo that will stream the content read from +the filesystem storage. This option is useful to avoid to serve files from odoo +and therefore to avoid to load the odoo process.

    +

    To be fully functional, this option requires the proxy to support x-sendfile +(apache) or x-accel-redirect (nginx). You must also configure your proxy by +adding for each storage a rule to redirect the url rooted at the ‘storagge code’ +to the server serving the files. For example, if you have a storage with the +code ‘my_storage’ and a server serving the files at the url ‘http://myserver.com’, +you must add the following rule in your proxy configuration:

    +
    +location /my_storage/ {
    +    internal;
    +    proxy_pass http://myserver.com;
    +}
    +
    +

    With this configuration a call to ‘/web/content/<att.id>/<att.name><att.extension>” +for a file stored in the ‘my_storage’ storage will generate a response by odoo +with the URI +/my_storage/<paht_in_storage>/<att.name>-<att.id>-<version><att.extension> +in the headers X-Accel-Redirect and X-Sendfile and the proxy will redirect to +http://myserver.com/<paht_in_storage>/<att.name>-<att.id>-<version><att.extension>.

    +

    see https://www.nginx.com/resources/wiki/start/topics/examples/x-accel/ for more +information.

    +
  • +
  • Use Filename Obfuscation: If checked, the filename used to store the content +into the filesystem storage will be obfuscated. This is useful to avoid to +expose the real filename of the attachments outside of the Odoo database. +The filename will be obfuscated by using the checksum of the content. This option +is to avoid when the content of your filestore is shared with other systems +(like your website) and you want to keep a meaningful filename to ensure +SEO. This option is disabled by default.

    +
  • +
+
+
+

Server Environment

+

When you configure a storage through the use of server environment file, you can +provide values for the following keys:

+
    +
  • optimizes_directory_path
  • +
  • autovacuum_gc
  • +
  • base_url
  • +
  • is_directory_path_in_url
  • +
  • use_x_sendfile_to_serve_internal_url
  • +
  • use_as_default_for_attachments
  • +
  • force_db_for_default_attachment_rules
  • +
  • use_filename_obfuscation
  • +
+

For example, the configuration of my storage with code fsprod used to store +the attachments by default could be:

+
+[fs_storage.fsprod]
+protocol=s3
+options={"endpoint_url": "https://my_s3_server/", "key": "KEY", "secret": "SECRET"}
+directory_path=my_bucket
+use_as_default_for_attachments=True
+use_filename_obfuscation=True
+
+
+
+

Advanced usage: Using attachment as a file

+

The open method on the attachment can be used to open manipulate the attachment +as a file object. The object returned by the call to the method implements +methods from io.IOBase. The method can ba called as any other python method. +In such a case, it’s your responsibility to close the file at the end of your +process.

+
+attachment = self.env.create({"name": "test.txt"})
+the_file = attachment.open("wb")
+try:
+  the_file.write(b"content")
+finally:
+  the_file.close()
+
+

The result of the call to open also works in a context with block. In such +a case, when the code exit the block, the file is automatically closed.

+
+attachment = self.env.create({"name": "test.txt"})
+with attachment.open("wb") as the_file:
+  the_file.write(b"content")
+
+

It’s always safer to prefer the second approach.

+

When your attachment is stored into the odoo filestore or into an external +filesystem storage, each time you call the open method, a new file is created. +This way of doing ensures that if the transaction is rollback the original content +is preserve. Nevertheless you could have use cases where you would like to write +to the existing file directly. For example you could create an empty attachment +to store a csv report and then use the open method to write your content directly +into the new file. To support this kind a use cases, the parameter new_version +can be passed as False to avoid the creation of a new file.

+
+attachment = self.env.create({"name": "test.txt"})
+with attachment.open("w", new_version=False) as f:
+    writer = csv.writer(f, delimiter=";")
+    ....
+
+
+
+

Tips & Tricks

+
    +
  • When working in multi staging environments, the management of the attachments +can be tricky. For example, if you have a production instance and a staging +instance based on a backup of the production environment, you may want to have +the attachments shared between the two instances BUT you don’t want to have +one instance removing or modifying the attachments of the other instance.

    +

    To do so, you can add on your staging instances a new storage and declare it +as the default storage to use for attachments. This way, all the new attachments +will be stored in this new storage but the attachments created on the production +instance will still be read from the production storage. Be careful to adapt the +configuration of your storage to the production environment to make it read only. +(The use of server environment files is a good way to do so).

    +
  • +
+
+
+
+

Bug Tracker

+

Bugs are tracked on GitHub Issues. +In case of trouble, please check there if your issue has already been reported. +If you spotted it first, help us smashing it by providing a detailed and welcomed +feedback.

+

Do not contact contributors directly about support or help with technical issues.

+
+
+

Credits

+
+

Authors

+
    +
  • Camptocamp
  • +
  • ACSONE SA/NV
  • +
+
+ +
+

Maintainers

+

This module is maintained by the OCA.

+Odoo Community Association +

OCA, or the Odoo Community Association, is a nonprofit organization whose +mission is to support the collaborative development of Odoo features and +promote its widespread use.

+

Current maintainer:

+

lmignon

+

This module is part of the OCA/storage project on GitHub.

+

You are welcome to contribute. To learn how please visit https://odoo-community.org/page/Contribute.

+
+
+
+ + diff --git a/fs_attachment/tests/__init__.py b/fs_attachment/tests/__init__.py new file mode 100644 index 0000000000..7f56d04124 --- /dev/null +++ b/fs_attachment/tests/__init__.py @@ -0,0 +1,3 @@ +from . import test_fs_attachment +from . import test_fs_attachment_file_like_adapter +from . import test_fs_attachment_internal_url diff --git a/fs_attachment/tests/common.py b/fs_attachment/tests/common.py new file mode 100644 index 0000000000..95ea76d006 --- /dev/null +++ b/fs_attachment/tests/common.py @@ -0,0 +1,53 @@ +# Copyright 2023 ACSONE SA/NV (http://acsone.eu). +# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). +import os +import shutil +import tempfile + +from odoo.tests.common import TransactionCase + + +class TestFSAttachmentCommon(TransactionCase): + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.env = cls.env(context=dict(cls.env.context, tracking_disable=True)) + temp_dir = tempfile.mkdtemp() + cls.temp_backend = cls.env["fs.storage"].create( + { + "name": "Temp FS Storage", + "protocol": "file", + "code": "tmp_dir", + "directory_path": temp_dir, + } + ) + cls.temp_dir = temp_dir + cls.gc_file_model = cls.env["fs.file.gc"] + cls.ir_attachment_model = cls.env["ir.attachment"] + + @cls.addClassCleanup + def cleanup_tempdir(): + shutil.rmtree(temp_dir) + + def setUp(self): + super().setUp() + # enforce temp_backend field since it seems that they are reset on + # savepoint rollback when managed by server_environment -> TO Be investigated + self.temp_backend.write( + { + "protocol": "file", + "code": "tmp_dir", + "directory_path": self.temp_dir, + } + ) + + def tearDown(self) -> None: + super().tearDown() + # empty the temp dir + for f in os.listdir(self.temp_dir): + os.remove(os.path.join(self.temp_dir, f)) + + +class MyException(Exception): + """Exception to be raised into tests ensure that we trap only this + exception and not other exceptions raised by the test""" diff --git a/fs_attachment/tests/test_fs_attachment.py b/fs_attachment/tests/test_fs_attachment.py new file mode 100644 index 0000000000..ce304c3d8f --- /dev/null +++ b/fs_attachment/tests/test_fs_attachment.py @@ -0,0 +1,342 @@ +# Copyright 2023 ACSONE SA/NV (http://acsone.eu). +# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). +import os +from unittest import mock + +from odoo.tools import mute_logger + +from .common import MyException, TestFSAttachmentCommon + + +class TestFSAttachment(TestFSAttachmentCommon): + def test_create_attachment_explicit_location(self): + content = b"This is a test attachment" + attachment = ( + self.env["ir.attachment"] + .with_context( + storage_location=self.temp_backend.code, + force_storage_key="test.txt", + ) + .create({"name": "test.txt", "raw": content}) + ) + self.assertEqual(os.listdir(self.temp_dir), [f"test-{attachment.id}-0.txt"]) + self.assertEqual(attachment.raw, content) + self.assertFalse(attachment.db_datas) + self.assertEqual(attachment.mimetype, "text/plain") + with attachment.open("rb") as f: + self.assertEqual(f.read(), content) + + with attachment.open("wb") as f: + f.write(b"new") + self.assertEqual(attachment.raw, b"new") + + def test_open_attachment_in_db(self): + self.env["ir.config_parameter"].sudo().set_param("ir_attachment.location", "db") + content = b"This is a test attachment in db" + attachment = self.ir_attachment_model.create( + {"name": "test.txt", "raw": content} + ) + self.assertFalse(attachment.store_fname) + self.assertTrue(attachment.db_datas) + self.assertEqual(attachment.mimetype, "text/plain") + with attachment.open("rb") as f: + self.assertEqual(f.read(), content) + with attachment.open("wb") as f: + f.write(b"new") + self.assertEqual(attachment.raw, b"new") + + def test_attachment_open_in_filestore(self): + self.env["ir.config_parameter"].sudo().set_param( + "ir_attachment.location", "file" + ) + content = b"This is a test attachment in filestore" + attachment = self.ir_attachment_model.create( + {"name": "test.txt", "raw": content} + ) + self.assertTrue(attachment.store_fname) + self.assertFalse(attachment.db_datas) + self.assertEqual(attachment.raw, content) + with attachment.open("rb") as f: + self.assertEqual(f.read(), content) + with attachment.open("wb") as f: + f.write(b"new") + self.assertEqual(attachment.raw, b"new") + + def test_default_attachment_store_in_fs(self): + self.temp_backend.use_as_default_for_attachments = True + content = b"This is a test attachment in filestore tmp_dir" + attachment = self.ir_attachment_model.create( + {"name": "test.txt", "raw": content} + ) + self.assertTrue(attachment.store_fname) + self.assertFalse(attachment.db_datas) + self.assertEqual(attachment.raw, content) + self.assertEqual(attachment.mimetype, "text/plain") + self.env.flush_all() + + initial_filename = f"test-{attachment.id}-0.txt" + + self.assertEqual(os.listdir(self.temp_dir), [initial_filename]) + + with attachment.open("rb") as f: + self.assertEqual(f.read(), content) + + with open(os.path.join(self.temp_dir, initial_filename), "rb") as f: + self.assertEqual(f.read(), content) + + # update the attachment + attachment.raw = b"new" + with attachment.open("rb") as f: + self.assertEqual(f.read(), b"new") + # a new file version is created + new_filename = f"test-{attachment.id}-1.txt" + with open(os.path.join(self.temp_dir, new_filename), "rb") as f: + self.assertEqual(f.read(), b"new") + self.assertEqual(attachment.raw, b"new") + self.assertEqual(attachment.store_fname, f"tmp_dir://{new_filename}") + self.assertEqual(attachment.mimetype, "text/plain") + + # the original file is to to be deleted by the GC + self.assertEqual( + set(os.listdir(self.temp_dir)), {initial_filename, new_filename} + ) + + # run the GC + self.env.flush_all() + self.gc_file_model._gc_files_unsafe() + self.assertEqual(os.listdir(self.temp_dir), [new_filename]) + + attachment.unlink() + # concrete file deletion is done by the GC + self.env.flush_all() + self.assertEqual(os.listdir(self.temp_dir), [new_filename]) + # run the GC + self.gc_file_model._gc_files_unsafe() + self.assertEqual(os.listdir(self.temp_dir), []) + + def test_fs_update_transactionnal(self): + """In this test we check that if a rollback is done on an update + The original content is preserved + """ + self.temp_backend.use_as_default_for_attachments = True + content = b"Transactional update" + attachment = self.ir_attachment_model.create( + {"name": "test.txt", "raw": content} + ) + self.env.flush_all() + self.assertEqual(attachment.raw, content) + + initial_filename = f"test-{attachment.id}-0.txt" + + self.assertEqual(attachment.store_fname, f"tmp_dir://{initial_filename}") + self.assertEqual(attachment.fs_filename, initial_filename) + self.assertEqual( + os.listdir(self.temp_dir), [os.path.basename(initial_filename)] + ) + + orignal_store_fname = attachment.store_fname + try: + with self.env.cr.savepoint(): + attachment.raw = b"updated" + new_filename = f"test-{attachment.id}-1.txt" + new_store_fname = f"tmp_dir://{new_filename}" + self.assertEqual(attachment.store_fname, new_store_fname) + self.assertEqual(attachment.fs_filename, new_filename) + # at this stage the original file and the new file are present + # in the list of files to GC + gc_files = self.gc_file_model.search([]).mapped("store_fname") + self.assertIn(orignal_store_fname, gc_files) + self.assertIn(orignal_store_fname, gc_files) + raise MyException("dummy exception") + except MyException: + ... + self.assertEqual(attachment.store_fname, f"tmp_dir://{initial_filename}") + self.assertEqual(attachment.fs_filename, initial_filename) + self.assertEqual(attachment.raw, content) + self.assertEqual(attachment.mimetype, "text/plain") + self.assertEqual( + set(os.listdir(self.temp_dir)), + {os.path.basename(initial_filename), os.path.basename(new_filename)}, + ) + # in test mode, gc collector is not run into a separate transaction + # therefore it has been reset. We manually add our two store_fname + # to the list of files to GC + self.gc_file_model._mark_for_gc(orignal_store_fname) + self.gc_file_model._mark_for_gc(new_store_fname) + # run gc + self.gc_file_model._gc_files_unsafe() + self.assertEqual( + os.listdir(self.temp_dir), [os.path.basename(initial_filename)] + ) + + def test_fs_create_transactional(self): + """In this test we check that if a rollback is done on a create + The file is removed + """ + self.temp_backend.use_as_default_for_attachments = True + content = b"Transactional create" + try: + + with self.env.cr.savepoint(): + attachment = self.ir_attachment_model.create( + {"name": "test.txt", "raw": content} + ) + self.env.flush_all() + self.assertEqual(attachment.raw, content) + initial_filename = f"test-{attachment.id}-0.txt" + self.assertEqual( + attachment.store_fname, f"tmp_dir://{initial_filename}" + ) + self.assertEqual(attachment.fs_filename, initial_filename) + self.assertEqual( + os.listdir(self.temp_dir), [os.path.basename(initial_filename)] + ) + new_store_fname = attachment.store_fname + # at this stage the new file is into the list of files to GC + gc_files = self.gc_file_model.search([]).mapped("store_fname") + self.assertIn(new_store_fname, gc_files) + raise MyException("dummy exception") + except MyException: + ... + self.env.flush_all() + # in test mode, gc collector is not run into a separate transaction + # therefore it has been reset. We manually add our new file to the + # list of files to GC + self.gc_file_model._mark_for_gc(new_store_fname) + # run gc + self.gc_file_model._gc_files_unsafe() + self.assertEqual(os.listdir(self.temp_dir), []) + + def test_fs_no_delete_if_not_in_current_directory_path(self): + """In this test we check that it's not possible to removes files + outside the current directory path even if they were created by the + current filesystem storage. + """ + # normal delete + self.temp_backend.use_as_default_for_attachments = True + content = b"Transactional create" + attachment = self.ir_attachment_model.create( + {"name": "test.txt", "raw": content} + ) + self.env.flush_all() + initial_filename = f"test-{attachment.id}-0.txt" + self.assertEqual( + os.listdir(self.temp_dir), [os.path.basename(initial_filename)] + ) + attachment.unlink() + self.gc_file_model._gc_files_unsafe() + self.assertEqual(os.listdir(self.temp_dir), []) + # delete outside the current directory path + attachment = self.ir_attachment_model.create( + {"name": "test.txt", "raw": content} + ) + self.env.flush_all() + initial_filename = f"test-{attachment.id}-0.txt" + self.assertEqual( + os.listdir(self.temp_dir), [os.path.basename(initial_filename)] + ) + self.temp_backend.directory_path = "/dummy" + attachment.unlink() + self.gc_file_model._gc_files_unsafe() + # unlink is not physically done since the file is outside the current + self.assertEqual( + os.listdir(self.temp_dir), [os.path.basename(initial_filename)] + ) + + def test_no_gc_if_disabled_on_storage(self): + store_fname = "tmp_dir://dummy-0-0.txt" + self.gc_file_model._mark_for_gc(store_fname) + self.temp_backend.autovacuum_gc = False + self.gc_file_model._gc_files_unsafe() + self.assertIn(store_fname, self.gc_file_model.search([]).mapped("store_fname")) + self.temp_backend.autovacuum_gc = False + self.gc_file_model._gc_files_unsafe() + self.assertIn(store_fname, self.gc_file_model.search([]).mapped("store_fname")) + self.temp_backend.autovacuum_gc = True + self.gc_file_model._gc_files_unsafe() + self.assertNotIn( + store_fname, self.gc_file_model.search([]).mapped("store_fname") + ) + + def test_attachment_fs_url(self): + self.temp_backend.base_url = "https://acsone.eu/media" + self.temp_backend.use_as_default_for_attachments = True + content = b"Transactional update" + attachment = self.ir_attachment_model.create( + {"name": "test.txt", "raw": content} + ) + self.env.flush_all() + attachment_path = f"/test-{attachment.id}-0.txt" + self.assertEqual(attachment.fs_url, f"https://acsone.eu/media{attachment_path}") + self.assertEqual(attachment.fs_url_path, attachment_path) + + self.temp_backend.is_directory_path_in_url = True + self.temp_backend.recompute_urls() + attachment_path = f"{self.temp_dir}/test-{attachment.id}-0.txt" + self.assertEqual(attachment.fs_url, f"https://acsone.eu/media{attachment_path}") + self.assertEqual(attachment.fs_url_path, attachment_path) + + def test_force_attachment_in_db_rules(self): + self.temp_backend.use_as_default_for_attachments = True + # force storage in db for text/plain + self.temp_backend.force_db_for_default_attachment_rules = '{"text/plain": 0}' + attachment = self.ir_attachment_model.create( + {"name": "test.txt", "raw": b"content"} + ) + self.env.flush_all() + self.assertFalse(attachment.store_fname) + self.assertEqual(attachment.db_datas, b"content") + self.assertEqual(attachment.mimetype, "text/plain") + + def test_force_storage_to_db(self): + self.temp_backend.use_as_default_for_attachments = True + attachment = self.ir_attachment_model.create( + {"name": "test.txt", "raw": b"content"} + ) + self.env.flush_all() + self.assertTrue(attachment.store_fname) + self.assertFalse(attachment.db_datas) + store_fname = attachment.store_fname + # we change the rules to force the storage in db for text/plain + self.temp_backend.force_db_for_default_attachment_rules = '{"text/plain": 0}' + attachment.force_storage_to_db_for_special_fields() + self.assertFalse(attachment.store_fname) + self.assertEqual(attachment.db_datas, b"content") + # we check that the file is marked for GC + gc_files = self.gc_file_model.search([]).mapped("store_fname") + self.assertIn(store_fname, gc_files) + + @mute_logger("odoo.addons.fs_attachment.models.ir_attachment") + def test_force_storage_to_fs(self): + attachment = self.ir_attachment_model.create( + {"name": "test.txt", "raw": b"content"} + ) + self.env.flush_all() + fs_path = self.ir_attachment_model._filestore() + "/" + attachment.store_fname + self.assertTrue(os.path.exists(fs_path)) + self.assertEqual(os.listdir(self.temp_dir), []) + # we decide to force the storage in the filestore + self.temp_backend.use_as_default_for_attachments = True + with mock.patch.object(self.env.cr, "commit"), mock.patch( + "odoo.addons.fs_attachment.models.ir_attachment.clean_fs" + ) as clean_fs: + self.ir_attachment_model.force_storage() + clean_fs.assert_called_once() + # files into the filestore must be moved to our filesystem storage + filename = f"test-{attachment.id}-0.txt" + self.assertEqual(attachment.store_fname, f"tmp_dir://{filename}") + self.assertIn(filename, os.listdir(self.temp_dir)) + + def test_storage_use_filename_obfuscation(self): + self.temp_backend.base_url = "https://acsone.eu/media" + self.temp_backend.use_as_default_for_attachments = True + self.temp_backend.use_filename_obfuscation = True + attachment = self.ir_attachment_model.create( + {"name": "test.txt", "raw": b"content"} + ) + self.env.flush_all() + self.assertTrue(attachment.store_fname) + self.assertEqual(attachment.name, "test.txt") + self.assertEqual(attachment.checksum, attachment.store_fname.split("/")[-1]) + self.assertEqual(attachment.checksum, attachment.fs_url.split("/")[-1]) + self.assertEqual(attachment.mimetype, "text/plain") diff --git a/fs_attachment/tests/test_fs_attachment_file_like_adapter.py b/fs_attachment/tests/test_fs_attachment_file_like_adapter.py new file mode 100644 index 0000000000..44ee875df4 --- /dev/null +++ b/fs_attachment/tests/test_fs_attachment_file_like_adapter.py @@ -0,0 +1,150 @@ +# Copyright 2023 ACSONE SA/NV (http://acsone.eu). +# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). + +from ..models.ir_attachment import AttachmentFileLikeAdapter +from .common import MyException, TestFSAttachmentCommon + + +class TestFSAttachmentFileLikeAdapterMixin: + @classmethod + def _create_attachment(cls): + raise NotImplementedError + + @classmethod + def prepareClass(cls): + cls.initial_content = b"This is a test attachment" + cls.new_content = b"This is a new test attachment" + + def prepare(self): + self.attachment = self._create_attachment() + + def open(self, attachment=None, mode="rb", new_version=False, **kwargs): + return AttachmentFileLikeAdapter( + attachment or self.attachment, + mode=mode, + new_version=new_version, + **kwargs, + ) + + def test_read(self): + with self.open(model="rf") as f: + self.assertEqual(f.read(), self.initial_content) + + def test_write(self): + with self.open(mode="wb") as f: + f.write(self.new_content) + self.assertEqual(self.new_content, self.attachment.raw) + + def test_write_append(self): + self.assertEqual(self.initial_content, self.attachment.raw) + with self.open(mode="ab") as f: + f.write(self.new_content) + self.assertEqual(self.initial_content + self.new_content, self.attachment.raw) + + def test_write_new_version(self): + initial_fname = self.attachment.store_fname + with self.open(mode="wb", new_version=True) as f: + f.write(self.new_content) + self.assertEqual(self.new_content, self.attachment.raw) + if initial_fname: + self.assertNotEqual(self.attachment.store_fname, initial_fname) + + def test_write_append_new_version(self): + initial_fname = self.attachment.store_fname + with self.open(mode="ab", new_version=True) as f: + f.write(self.new_content) + self.assertEqual(self.initial_content + self.new_content, self.attachment.raw) + if initial_fname: + self.assertNotEqual(self.attachment.store_fname, initial_fname) + + def test_write_transactional_new_version_only(self): + try: + initial_fname = self.attachment.store_fname + with self.env.cr.savepoint(): + with self.open(mode="wb", new_version=True) as f: + f.write(self.new_content) + self.assertEqual(self.new_content, self.attachment.raw) + if initial_fname: + self.assertNotEqual(self.attachment.store_fname, initial_fname) + raise MyException("Test") + except MyException: + ... + + self.assertEqual(self.initial_content, self.attachment.raw) + if initial_fname: + self.assertEqual(self.attachment.store_fname, initial_fname) + + +class TestAttachmentInFileSystemFileLikeAdapter( + TestFSAttachmentCommon, TestFSAttachmentFileLikeAdapterMixin +): + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.prepareClass() + + def setUp(self): + super().setUp() + self.prepare() + + @classmethod + def _create_attachment(cls): + return ( + cls.env["ir.attachment"] + .with_context( + storage_location=cls.temp_backend.code, + storage_file_path="test.txt", + ) + .create({"name": "test.txt", "raw": cls.initial_content}) + ) + + +class TestAttachmentInDBFileLikeAdapter( + TestFSAttachmentCommon, TestFSAttachmentFileLikeAdapterMixin +): + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.prepareClass() + + def setUp(self): + super().setUp() + self.env["ir.config_parameter"].sudo().set_param("ir_attachment.location", "db") + self.prepare() + + def tearDown(self) -> None: + self.attachment.unlink() + super().tearDown() + + @classmethod + def _create_attachment(cls): + return cls.env["ir.attachment"].create( + {"name": "test.txt", "raw": cls.initial_content} + ) + + +class TestAttachmentInFileFileLikeAdapter( + TestFSAttachmentCommon, TestFSAttachmentFileLikeAdapterMixin +): + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.prepareClass() + + def setUp(self): + super().setUp() + self.env["ir.config_parameter"].sudo().set_param( + "ir_attachment.location", "file" + ) + self.prepare() + + def tearDown(self) -> None: + self.attachment.unlink() + self.attachment._gc_file_store_unsafe() + super().tearDown() + + @classmethod + def _create_attachment(cls): + return cls.env["ir.attachment"].create( + {"name": "test.txt", "raw": cls.initial_content} + ) diff --git a/fs_attachment/tests/test_fs_attachment_internal_url.py b/fs_attachment/tests/test_fs_attachment_internal_url.py new file mode 100644 index 0000000000..0dac94c72d --- /dev/null +++ b/fs_attachment/tests/test_fs_attachment_internal_url.py @@ -0,0 +1,108 @@ +# Copyright 2023 ACSONE SA/NV (http://acsone.eu). +# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). +import os +import shutil +import tempfile +from unittest.mock import patch + +from odoo.tests.common import HttpCase +from odoo.tools import config + + +class TestFsAttachmentInternalUrl(HttpCase): + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.env = cls.env(context=dict(cls.env.context, tracking_disable=True)) + temp_dir = tempfile.mkdtemp() + cls.temp_backend = cls.env["fs.storage"].create( + { + "name": "Temp FS Storage", + "protocol": "file", + "code": "tmp_dir", + "directory_path": temp_dir, + "base_url": "http://my.public.files/", + } + ) + cls.temp_dir = temp_dir + cls.gc_file_model = cls.env["fs.file.gc"] + cls.content = b"This is a test attachment" + cls.attachment = ( + cls.env["ir.attachment"] + .with_context( + storage_location=cls.temp_backend.code, + storage_file_path="test.txt", + ) + .create({"name": "test.txt", "raw": cls.content}) + ) + + @cls.addClassCleanup + def cleanup_tempdir(): + shutil.rmtree(temp_dir) + + def setUp(self): + super().setUp() + # enforce temp_backend field since it seems that they are reset on + # savepoint rollback when managed by server_environment -> TO Be investigated + self.temp_backend.write( + { + "protocol": "file", + "code": "tmp_dir", + "directory_path": self.temp_dir, + "base_url": "http://my.public.files/", + } + ) + + @classmethod + def tearDownClass(cls): + super().tearDownClass() + for f in os.listdir(cls.temp_dir): + os.remove(os.path.join(cls.temp_dir, f)) + + def assertDownload( + self, url, headers, assert_status_code, assert_headers, assert_content=None + ): + res = self.url_open(url, headers=headers) + res.raise_for_status() + self.assertEqual(res.status_code, assert_status_code) + for header_name, header_value in assert_headers.items(): + self.assertEqual( + res.headers.get(header_name), + header_value, + f"Wrong value for header {header_name}", + ) + if assert_content: + self.assertEqual(res.content, assert_content, "Wong content") + return res + + def test_fs_attachment_internal_url(self): + self.authenticate("admin", "admin") + self.assertDownload( + self.attachment.internal_url, + headers={}, + assert_status_code=200, + assert_headers={ + "Content-Type": "text/plain; charset=utf-8", + "Content-Disposition": "inline; filename=test.txt", + }, + assert_content=self.content, + ) + + def test_fs_attachment_internal_url_x_sendfile(self): + self.authenticate("admin", "admin") + self.temp_backend.write({"use_x_sendfile_to_serve_internal_url": True}) + with patch.object(config, "options", {**config.options, "x_sendfile": True}): + x_accel_redirect = f"/tmp_dir/test-{self.attachment.id}-0.txt" + self.assertDownload( + self.attachment.internal_url, + headers={}, + assert_status_code=200, + assert_headers={ + "Content-Type": "text/plain; charset=utf-8", + "Content-Disposition": "inline; filename=test.txt", + "X-Accel-Redirect": x_accel_redirect, + "Content-Length": "0", + "X-Sendfile": x_accel_redirect, + }, + assert_content=None, + ) diff --git a/fs_attachment/views/fs_storage.xml b/fs_attachment/views/fs_storage.xml new file mode 100644 index 0000000000..8754440670 --- /dev/null +++ b/fs_attachment/views/fs_storage.xml @@ -0,0 +1,31 @@ + + + + + + fs.storage.form (in fs_attachment) + fs.storage + + + + + + + + + + + + + + + + + + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000..1d1ccedc2d --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +# generated from manifests external_dependencies +python_slugify diff --git a/setup/fs_attachment/odoo/addons/fs_attachment b/setup/fs_attachment/odoo/addons/fs_attachment new file mode 120000 index 0000000000..9d55342885 --- /dev/null +++ b/setup/fs_attachment/odoo/addons/fs_attachment @@ -0,0 +1 @@ +../../../../fs_attachment \ No newline at end of file diff --git a/setup/fs_attachment/setup.py b/setup/fs_attachment/setup.py new file mode 100644 index 0000000000..28c57bb640 --- /dev/null +++ b/setup/fs_attachment/setup.py @@ -0,0 +1,6 @@ +import setuptools + +setuptools.setup( + setup_requires=['setuptools-odoo'], + odoo_addon=True, +) diff --git a/test-requirements.txt b/test-requirements.txt index 932a8957f7..fa87ce7ba6 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1 +1,2 @@ mock +odoo-addon-fs-storage @ git+https://github.com/OCA/storage.git@refs/pull/252/head#subdirectory=setup/fs_storage