-- Migration: 026_DocumentContentHashes -- Description: Create table for tracking document content hashes to prevent duplicate processing CREATE TABLE IF NOT EXISTS document_content_hashes ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), company_id TEXT NOT NULL, content_hash TEXT NOT NULL, -- SHA-256 hash of file content original_filename TEXT NOT NULL, attachment_id TEXT, -- Reference to attachment_read_models draft_id TEXT, -- Reference to journal_entry_draft_read_models created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), -- Ensure unique hash per company (same document can exist in different companies) CONSTRAINT uq_document_hash_company UNIQUE(company_id, content_hash) ); -- Index for fast lookups by company CREATE INDEX IF NOT EXISTS idx_document_content_hashes_company_id ON document_content_hashes(company_id); -- Index for hash lookups CREATE INDEX IF NOT EXISTS idx_document_content_hashes_content_hash ON document_content_hashes(content_hash); COMMENT ON TABLE document_content_hashes IS 'Tracks document content hashes to prevent duplicate processing of the same document'; COMMENT ON COLUMN document_content_hashes.content_hash IS 'SHA-256 hash of the file content'; COMMENT ON COLUMN document_content_hashes.draft_id IS 'Reference to the journal entry draft created from this document';